def test_local_backed_remote_cache_corrupt_artifact(self): """Ensure that a combined cache clears outputs after a failure to extract an artifact.""" with temporary_dir() as remote_cache_dir: with self.setup_server(cache_root=remote_cache_dir) as server: with self.setup_local_cache() as local: tmp = TempLocalArtifactCache(local.artifact_root, compression=1) remote = RESTfulArtifactCache(local.artifact_root, BestUrlSelector([server.url]), tmp) combined = RESTfulArtifactCache(local.artifact_root, BestUrlSelector([server.url]), local) key = CacheKey('muppet_key', 'fake_hash') results_dir = os.path.join(local.artifact_root, 'a/sub/dir') safe_mkdir(results_dir) self.assertTrue(os.path.exists(results_dir)) with self.setup_test_file(results_dir) as path: # Add to only the remote cache. remote.insert(key, [path]) # Corrupt the artifact in the remote storage. self.assertTrue(server.corrupt_artifacts(r'.*muppet_key.*') == 1) # An attempt to read the corrupt artifact should fail. self.assertFalse(combined.use_cached_files(key, results_dir=results_dir)) # The local artifact should not have been stored, and the results_dir should exist, # but be empty. self.assertFalse(local.has(key)) self.assertTrue(os.path.exists(results_dir)) self.assertTrue(len(os.listdir(results_dir)) == 0)
def test_combined_cache(self): """Make sure that the combined cache finds what it should and that it backfills.""" httpd = None httpd_thread = None try: with temporary_dir() as http_root: with temporary_dir() as cache_root: with pushd(http_root): # SimpleRESTHandler serves from the cwd. httpd = SocketServer.TCPServer(('localhost', 0), SimpleRESTHandler) port = httpd.server_address[1] httpd_thread = Thread(target=httpd.serve_forever) httpd_thread.start() with temporary_dir() as artifact_root: local = LocalArtifactCache(None, artifact_root, cache_root) remote = RESTfulArtifactCache(MockLogger(), artifact_root, 'http://localhost:%d' % port) combined = CombinedArtifactCache([local, remote]) key = CacheKey('muppet_key', 'fake_hash', 42) with temporary_file(artifact_root) as f: # Write the file. f.write(TEST_CONTENT1) path = f.name f.close() # No cache has key. self.assertFalse(local.has(key)) self.assertFalse(remote.has(key)) self.assertFalse(combined.has(key)) # No cache returns key. self.assertFalse(bool(local.use_cached_files(key))) self.assertFalse(bool(remote.use_cached_files(key))) self.assertFalse(bool(combined.use_cached_files(key))) # Attempting to use key that no cache had should not change anything. self.assertFalse(local.has(key)) self.assertFalse(remote.has(key)) self.assertFalse(combined.has(key)) # Add to only remote cache. remote.insert(key, [path]) self.assertFalse(local.has(key)) self.assertTrue(remote.has(key)) self.assertTrue(combined.has(key)) # Successfully using via remote should NOT change local. self.assertTrue(bool(remote.use_cached_files(key))) self.assertFalse(local.has(key)) # Successfully using via combined SHOULD backfill local. self.assertTrue(bool(combined.use_cached_files(key))) self.assertTrue(local.has(key)) self.assertTrue(bool(local.use_cached_files(key))) finally: if httpd: httpd.shutdown() if httpd_thread: httpd_thread.join()
def setup_rest_cache(self, local=None, return_failed=False): with temporary_dir() as artifact_root: local = local or TempLocalArtifactCache(artifact_root, 0) with self.setup_server(return_failed=return_failed) as server: yield RESTfulArtifactCache(artifact_root, BestUrlSelector([server.url]), local)
def create_artifact_cache(log, artifact_root, spec, task_name, action='using'): """Returns an artifact cache for the specified spec. spec can be: - a path to a file-based cache root. - a URL of a RESTful cache root. - a bar-separated list of URLs, where we'll pick the one with the best ping times. - A list of the above, for a combined cache. """ if not spec: raise ValueError('Empty artifact cache spec') if isinstance(spec, basestring): if spec.startswith('/') or spec.startswith('~'): path = os.path.join(spec, task_name) log.info('%s %s local artifact cache at %s' % (task_name, action, path)) return LocalArtifactCache(log, artifact_root, path) elif spec.startswith('http://') or spec.startswith('https://'): # Caches are supposed to be close, and we don't want to waste time pinging on no-op builds. # So we ping twice with a short timeout. pinger = Pinger(timeout=0.5, tries=2) best_url = select_best_url(spec, pinger, log) if best_url: url = best_url.rstrip('/') + '/' + task_name log.info('%s %s remote artifact cache at %s' % (task_name, action, url)) return RESTfulArtifactCache(log, artifact_root, url) else: log.warn('%s has no reachable artifact cache in %s.' % (task_name, spec)) return None else: raise ValueError('Invalid artifact cache spec: %s' % spec) elif isinstance(spec, (list, tuple)): caches = filter(None, [ create_artifact_cache(log, artifact_root, x, task_name, action) for x in spec ]) return CombinedArtifactCache(caches) if caches else None
def test_restful_cache(self): with self.assertRaises(InvalidRESTfulCacheProtoError): RESTfulArtifactCache('foo', BestUrlSelector(['ftp://localhost/bar']), 'foo') with self.setup_rest_cache() as artifact_cache: self.do_test_artifact_cache(artifact_cache)
def create_remote_cache(urls, local_cache): best_url = self.select_best_url(urls) if best_url: url = best_url.rstrip('/') + '/' + self._stable_name self._log.debug('{0} {1} remote artifact cache at {2}' .format(self._stable_name, action, url)) local_cache = local_cache or TempLocalArtifactCache(artifact_root, compression) return RESTfulArtifactCache(artifact_root, url, local_cache)
def create_remote_cache(remote_spec, local_cache): urls = self.get_available_urls(remote_spec.split('|')) if len(urls) > 0: best_url_selector = BestUrlSelector(['{}/{}'.format(url.rstrip('/'), self._stable_name) for url in urls]) local_cache = local_cache or TempLocalArtifactCache(artifact_root, compression) return RESTfulArtifactCache(artifact_root, best_url_selector, local_cache)
def test_restful_cache_failover(self): bad_url = 'http://badhost:123' with temporary_dir() as artifact_root: local = TempLocalArtifactCache(artifact_root, 0) # With fail-over, rest call second time will succeed with self.setup_server() as good_server: artifact_cache = RESTfulArtifactCache( artifact_root, BestUrlSelector([bad_url, good_server.url], max_failures=0), local) with self.assertRaises(NonfatalArtifactCacheError) as ex: self.do_test_artifact_cache(artifact_cache) self.assertIn('Failed to HEAD', str(ex.exception)) self.do_test_artifact_cache(artifact_cache)
def create_remote_cache(remote_spec, local_cache): urls = self.get_available_urls(remote_spec.split("|")) if len(urls) > 0: best_url_selector = BestUrlSelector([ "{}/{}".format(url.rstrip("/"), self._cache_dirname) for url in urls ]) local_cache = local_cache or TempLocalArtifactCache( artifact_root, compression) return RESTfulArtifactCache( artifact_root, best_url_selector, local_cache, read_timeout=self._options.read_timeout, write_timeout=self._options.write_timeout, )
def test_restful_cache(self): httpd = None httpd_thread = None try: with temporary_dir() as cache_root: with pushd( cache_root): # SimpleRESTHandler serves from the cwd. httpd = SocketServer.TCPServer(('localhost', 0), SimpleRESTHandler) port = httpd.server_address[1] httpd_thread = Thread(target=httpd.serve_forever) httpd_thread.start() with temporary_dir() as artifact_root: artifact_cache = RESTfulArtifactCache( MockLogger(), artifact_root, 'http://localhost:%d' % port) self.do_test_artifact_cache(artifact_cache) finally: if httpd: httpd.shutdown() if httpd_thread: httpd_thread.join()
def test_local_backed_remote_cache_corrupt_artifact(self): """Ensure that a combined cache clears outputs after a failure to extract an artifact.""" with temporary_dir() as remote_cache_dir: with self.setup_server(cache_root=remote_cache_dir) as server: with self.setup_local_cache() as local: tmp = TempLocalArtifactCache( local.artifact_root, local.artifact_extraction_root, compression=1) remote = RESTfulArtifactCache( local.artifact_root, BestUrlSelector([server.url]), tmp) combined = RESTfulArtifactCache( local.artifact_root, BestUrlSelector([server.url]), local) key = CacheKey("muppet_key", "fake_hash") results_dir = os.path.join(local.artifact_root, "a/sub/dir") safe_mkdir(results_dir) self.assertTrue(os.path.exists(results_dir)) with self.setup_test_file(results_dir) as path: # Add to only the remote cache. remote.insert(key, [path]) # Corrupt the artifact in the remote storage. self.assertTrue( server.corrupt_artifacts(r".*muppet_key.*") == 1) # An attempt to read the corrupt artifact should fail. self.assertFalse( combined.use_cached_files(key, results_dir=results_dir)) # The local artifact should not have been stored, and the results_dir should exist, # but be empty. self.assertFalse(local.has(key)) self.assertTrue(os.path.exists(results_dir)) self.assertTrue(len(os.listdir(results_dir)) == 0)
def test_local_backed_remote_cache(self): """make sure that the combined cache finds what it should and that it backfills""" with self.setup_server() as server: with self.setup_local_cache() as local: tmp = TempLocalArtifactCache(local.artifact_root, 0) remote = RESTfulArtifactCache(local.artifact_root, BestUrlSelector([server.url]), tmp) combined = RESTfulArtifactCache(local.artifact_root, BestUrlSelector([server.url]), local) key = CacheKey('muppet_key', 'fake_hash') with self.setup_test_file(local.artifact_root) as path: # No cache has key. self.assertFalse(local.has(key)) self.assertFalse(remote.has(key)) self.assertFalse(combined.has(key)) # No cache returns key. self.assertFalse(bool(local.use_cached_files(key))) self.assertFalse(bool(remote.use_cached_files(key))) self.assertFalse(bool(combined.use_cached_files(key))) # Attempting to use key that no cache had should not change anything. self.assertFalse(local.has(key)) self.assertFalse(remote.has(key)) self.assertFalse(combined.has(key)) # Add to only remote cache. remote.insert(key, [path]) # After insertion to remote, remote and only remote should have key self.assertFalse(local.has(key)) self.assertTrue(remote.has(key)) self.assertTrue(combined.has(key)) # Successfully using via remote should NOT change local. self.assertTrue(bool(remote.use_cached_files(key))) self.assertFalse(local.has(key)) # Successfully using via combined SHOULD backfill local. self.assertTrue(bool(combined.use_cached_files(key))) self.assertTrue(local.has(key)) self.assertTrue(bool(local.use_cached_files(key)))
def test_local_backed_remote_cache(self): """make sure that the combined cache finds what it should and that it backfills""" with self.setup_server() as url: with self.setup_local_cache() as local: tmp = TempLocalArtifactCache(local.artifact_root, 0) remote = RESTfulArtifactCache(local.artifact_root, BestUrlSelector([url]), tmp) combined = RESTfulArtifactCache(local.artifact_root, BestUrlSelector([url]), local) key = CacheKey('muppet_key', 'fake_hash') with self.setup_test_file(local.artifact_root) as path: # No cache has key. self.assertFalse(local.has(key)) self.assertFalse(remote.has(key)) self.assertFalse(combined.has(key)) # No cache returns key. self.assertFalse(bool(local.use_cached_files(key))) self.assertFalse(bool(remote.use_cached_files(key))) self.assertFalse(bool(combined.use_cached_files(key))) # Attempting to use key that no cache had should not change anything. self.assertFalse(local.has(key)) self.assertFalse(remote.has(key)) self.assertFalse(combined.has(key)) # Add to only remote cache. remote.insert(key, [path]) # After insertion to remote, remote and only remote should have key self.assertFalse(local.has(key)) self.assertTrue(remote.has(key)) self.assertTrue(combined.has(key)) # Successfully using via remote should NOT change local. self.assertTrue(bool(remote.use_cached_files(key))) self.assertFalse(local.has(key)) # Successfully using via combined SHOULD backfill local. self.assertTrue(bool(combined.use_cached_files(key))) self.assertTrue(local.has(key)) self.assertTrue(bool(local.use_cached_files(key)))
def setup_rest_cache(self, local=None): with temporary_dir() as artifact_root: local = local or TempLocalArtifactCache(artifact_root, 0) with self.setup_server() as base_url: yield RESTfulArtifactCache(artifact_root, base_url, local)
def create_artifact_cache(log, artifact_root, spec, task_name, compression, action='using', local=None): """Returns an artifact cache for the specified spec. spec can be: - a path to a file-based cache root. - a URL of a RESTful cache root. - a bar-separated list of URLs, where we'll pick the one with the best ping times. - A list or tuple of two specs, local, then remote, each as described above :param log: context.log :param str artifact_root: The path under which cacheable products will be read/written. :param str spec: See above. :param str task_name: The name of the task using this cache (eg 'ScalaCompile') :param int compression: The gzip compression level for created artifacts. Valid values are 1-9, or Falsy-y to disable compression. :param str action: A verb, eg 'read' or 'write' for printed messages. :param LocalArtifactCache local: A local cache for use by created remote caches """ if not spec: raise EmptyCacheSpecError() if compression and not isinstance(compression, (int, long)): raise ValueError('compression value must be an integer: {comp}'.format(comp=compression)) def recurse(new_spec, new_local=local): return create_artifact_cache(log=log, artifact_root=artifact_root, spec=new_spec, task_name=task_name, compression=compression, action=action, local=new_local) def is_remote(spec): return spec.startswith('http://') or spec.startswith('https://') if isinstance(spec, basestring): if spec.startswith('/') or spec.startswith('~'): path = os.path.join(spec, task_name) log.debug('{0} {1} local artifact cache at {2}'.format(task_name, action, path)) return LocalArtifactCache(artifact_root, path, compression) elif is_remote(spec): # Caches are supposed to be close, and we don't want to waste time pinging on no-op builds. # So we ping twice with a short timeout. pinger = Pinger(timeout=0.5, tries=2) best_url = select_best_url(spec, pinger, log) if best_url: url = best_url.rstrip('/') + '/' + task_name log.debug('{0} {1} remote artifact cache at {2}'.format(task_name, action, url)) local = local or TempLocalArtifactCache(artifact_root) return RESTfulArtifactCache(artifact_root, url, local) else: log.warn('{0} has no reachable artifact cache in {1}.'.format(task_name, spec)) return None else: raise CacheSpecFormatError('Invalid artifact cache spec: {0}'.format(spec)) elif isinstance(spec, (list, tuple)) and len(spec) is 1: return recurse(spec[0]) elif isinstance(spec, (list, tuple)) and len(spec) is 2: first = recurse(spec[0]) if not isinstance(first, LocalArtifactCache): raise LocalCacheSpecRequiredError( 'First of two cache specs must be a local cache path. Found: {0}'.format(spec[0])) if not is_remote(spec[1]): raise RemoteCacheSpecRequiredError( 'Second of two cache specs must be a remote spec. Found: {0}'.format(spec[1])) return recurse(spec[1], new_local=first) else: raise InvalidCacheSpecError('Invalid artifact cache spec: {0}'.format(spec))