Example #1
0
class ArtifactCache(object):
    '''Fetch artifacts from URLS and purge least recently used artifacts from disk.'''
    def __init__(self, cache_dir, log=None, skip_cache=False):
        mkdir(cache_dir, not_indexed=True)
        self._cache_dir = cache_dir
        self._log = log
        self._skip_cache = skip_cache
        self._persist_limit = ArtifactPersistLimit(log)
        self._download_manager = DownloadManager(
            self._cache_dir, persist_limit=self._persist_limit)
        self._last_dl_update = -1

    def log(self, *args, **kwargs):
        if self._log:
            self._log(*args, **kwargs)

    def fetch(self, url, force=False):
        fname = os.path.basename(url)
        try:
            # Use the file name from the url if it looks like a hash digest.
            if len(fname) not in (32, 40, 56, 64, 96, 128):
                raise TypeError()
            binascii.unhexlify(fname)
        except TypeError:
            # We download to a temporary name like HASH[:16]-basename to
            # differentiate among URLs with the same basenames.  We used to then
            # extract the build ID from the downloaded artifact and use it to make a
            # human readable unique name, but extracting build IDs is time consuming
            # (especially on Mac OS X, where we must mount a large DMG file).
            hash = hashlib.sha256(url).hexdigest()[:16]
            # Strip query string and fragments.
            basename = os.path.basename(urlparse.urlparse(url).path)
            fname = hash + '-' + basename

        path = os.path.abspath(mozpath.join(self._cache_dir, fname))
        if self._skip_cache and os.path.exists(path):
            self.log(
                logging.INFO, 'artifact', {'path': path},
                'Skipping cache: removing cached downloaded artifact {path}')
            os.remove(path)

        self.log(logging.INFO, 'artifact', {'path': path},
                 'Downloading to temporary location {path}')
        try:
            dl = self._download_manager.download(url, fname)

            def download_progress(dl, bytes_so_far, total_size):
                if not total_size:
                    return
                percent = (float(bytes_so_far) / total_size) * 100
                now = int(percent / 5)
                if now == self._last_dl_update:
                    return
                self._last_dl_update = now
                self.log(
                    logging.INFO, 'artifact', {
                        'bytes_so_far': bytes_so_far,
                        'total_size': total_size,
                        'percent': percent
                    }, 'Downloading... {percent:02.1f} %')

            if dl:
                dl.set_progress(download_progress)
                dl.wait()
            else:
                # Avoid the file being removed if it was in the cache already.
                path = os.path.join(self._cache_dir, fname)
                self._persist_limit.register_file(path)

            self.log(logging.INFO, 'artifact', {
                'path':
                os.path.abspath(mozpath.join(self._cache_dir, fname))
            }, 'Downloaded artifact to {path}')
            return os.path.abspath(mozpath.join(self._cache_dir, fname))
        finally:
            # Cancel any background downloads in progress.
            self._download_manager.cancel()

    def clear_cache(self):
        if self._skip_cache:
            self.log(logging.INFO, 'artifact', {},
                     'Skipping cache: ignoring clear_cache!')
            return

        self._persist_limit.remove_all()
Example #2
0
class ArtifactCache(CacheManager):
    '''Fetch Task Cluster artifact URLs and purge least recently used artifacts from disk.'''

    def __init__(self, cache_dir, log=None, skip_cache=False):
        # TODO: instead of storing N artifact packages, store M megabytes.
        CacheManager.__init__(self, cache_dir, 'fetch', MAX_CACHED_ARTIFACTS, cache_callback=self.delete_file, log=log, skip_cache=skip_cache)
        self._cache_dir = cache_dir
        size_limit = 1024 * 1024 * 1024 # 1Gb in bytes.
        file_limit = 4 # But always keep at least 4 old artifacts around.
        persist_limit = PersistLimit(size_limit, file_limit)
        self._download_manager = DownloadManager(self._cache_dir, persist_limit=persist_limit)
        self._last_dl_update = -1

    def delete_file(self, key, value):
        try:
            os.remove(value)
            self.log(logging.INFO, 'artifact',
                {'filename': value},
                'Purged artifact {filename}')
        except (OSError, IOError):
            pass

        try:
            os.remove(value + PROCESSED_SUFFIX)
            self.log(logging.INFO, 'artifact',
                {'filename': value + PROCESSED_SUFFIX},
                'Purged processed artifact {filename}')
        except (OSError, IOError):
            pass

    @cachedmethod(operator.attrgetter('_cache'))
    def fetch(self, url, force=False):
        # We download to a temporary name like HASH[:16]-basename to
        # differentiate among URLs with the same basenames.  We used to then
        # extract the build ID from the downloaded artifact and use it to make a
        # human readable unique name, but extracting build IDs is time consuming
        # (especially on Mac OS X, where we must mount a large DMG file).
        hash = hashlib.sha256(url).hexdigest()[:16]
        fname = hash + '-' + os.path.basename(url)

        path = os.path.abspath(mozpath.join(self._cache_dir, fname))
        if self._skip_cache and os.path.exists(path):
            self.log(logging.DEBUG, 'artifact',
                {'path': path},
                'Skipping cache: removing cached downloaded artifact {path}')
            os.remove(path)

        self.log(logging.INFO, 'artifact',
            {'path': path},
            'Downloading to temporary location {path}')
        try:
            dl = self._download_manager.download(url, fname)

            def download_progress(dl, bytes_so_far, total_size):
                percent = (float(bytes_so_far) / total_size) * 100
                now = int(percent / 5)
                if now == self._last_dl_update:
                    return
                self._last_dl_update = now
                self.log(logging.INFO, 'artifact',
                         {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent},
                         'Downloading... {percent:02.1f} %')

            if dl:
                dl.set_progress(download_progress)
                dl.wait()
            self.log(logging.INFO, 'artifact',
                {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
                'Downloaded artifact to {path}')
            return os.path.abspath(mozpath.join(self._cache_dir, fname))
        finally:
            # Cancel any background downloads in progress.
            self._download_manager.cancel()

    def print_last_item(self, args, sorted_kwargs, result):
        url, = args
        self.log(logging.INFO, 'artifact',
            {'url': url},
            'Last installed binaries from url {url}')
        self.log(logging.INFO, 'artifact',
            {'filename': result},
            'Last installed binaries from local file {filename}')
        self.log(logging.INFO, 'artifact',
            {'filename': result + PROCESSED_SUFFIX},
            'Last installed binaries from local processed file {filename}')
Example #3
0
class ArtifactCache(object):
    '''Fetch Task Cluster artifact URLs and purge least recently used artifacts from disk.'''

    def __init__(self, cache_dir, log=None, skip_cache=False):
        mkdir(cache_dir, not_indexed=True)
        self._cache_dir = cache_dir
        self._log = log
        self._skip_cache = skip_cache
        self._persist_limit = ArtifactPersistLimit(log)
        self._download_manager = DownloadManager(
            self._cache_dir, persist_limit=self._persist_limit)
        self._last_dl_update = -1

    def log(self, *args, **kwargs):
        if self._log:
            self._log(*args, **kwargs)

    def fetch(self, url, force=False):
        fname = os.path.basename(url)
        try:
            # Use the file name from the url if it looks like a hash digest.
            if len(fname) not in (32, 40, 56, 64, 96, 128):
                raise TypeError()
            binascii.unhexlify(fname)
        except TypeError:
            # We download to a temporary name like HASH[:16]-basename to
            # differentiate among URLs with the same basenames.  We used to then
            # extract the build ID from the downloaded artifact and use it to make a
            # human readable unique name, but extracting build IDs is time consuming
            # (especially on Mac OS X, where we must mount a large DMG file).
            hash = hashlib.sha256(url).hexdigest()[:16]
            fname = hash + '-' + os.path.basename(url)

        path = os.path.abspath(mozpath.join(self._cache_dir, fname))
        if self._skip_cache and os.path.exists(path):
            self.log(logging.DEBUG, 'artifact',
                {'path': path},
                'Skipping cache: removing cached downloaded artifact {path}')
            os.remove(path)

        self.log(logging.INFO, 'artifact',
            {'path': path},
            'Downloading to temporary location {path}')
        try:
            dl = self._download_manager.download(url, fname)

            def download_progress(dl, bytes_so_far, total_size):
                if not total_size:
                    return
                percent = (float(bytes_so_far) / total_size) * 100
                now = int(percent / 5)
                if now == self._last_dl_update:
                    return
                self._last_dl_update = now
                self.log(logging.INFO, 'artifact',
                         {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent},
                         'Downloading... {percent:02.1f} %')

            if dl:
                dl.set_progress(download_progress)
                dl.wait()
            else:
                # Avoid the file being removed if it was in the cache already.
                path = os.path.join(self._cache_dir, fname)
                self._persist_limit.register_file(path)

            self.log(logging.INFO, 'artifact',
                {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
                'Downloaded artifact to {path}')
            return os.path.abspath(mozpath.join(self._cache_dir, fname))
        finally:
            # Cancel any background downloads in progress.
            self._download_manager.cancel()

    def clear_cache(self):
        if self._skip_cache:
            self.log(logging.DEBUG, 'artifact',
                {},
                'Skipping cache: ignoring clear_cache!')
            return

        self._persist_limit.remove_all()