def test_combine_multiple(self): key1 = CacheKey(id="1", hash="a") key2 = CacheKey(id="2", hash="b") combined_key = CacheKey.combine_cache_keys([key1, key2]) self.assertNotEqual(key1, combined_key) self.assertNotEqual(key2, combined_key) self.assertEqual(combined_key, CacheKey.combine_cache_keys([key1, key2]))
def test_combine_multiple(self): key1 = CacheKey(id='1', hash='a') key2 = CacheKey(id='2', hash='b') combined_key = CacheKey.combine_cache_keys([key1, key2]) self.assertNotEqual(key1, combined_key) self.assertNotEqual(key2, combined_key) self.assertEqual(combined_key, CacheKey.combine_cache_keys([key1, key2]))
def do_test_artifact_cache(self, artifact_cache): key = CacheKey("muppet_key", "fake_hash") with self.setup_test_file(artifact_cache.artifact_root) as path: # Cache it. self.assertFalse(artifact_cache.has(key)) self.assertFalse(bool(artifact_cache.use_cached_files(key))) artifact_cache.insert(key, [path]) self.assertTrue(artifact_cache.has(key)) # Stomp it. with open(path, "wb") as outfile: outfile.write(TEST_CONTENT2) # Recover it from the cache. self.assertTrue(bool(artifact_cache.use_cached_files(key))) # Check that it was recovered correctly. extracted_file_path = os.path.join( artifact_cache.artifact_extraction_root, os.path.basename(path)) with open(extracted_file_path, "rb") as infile: content = infile.read() self.assertEqual(content, TEST_CONTENT1) # Delete it. artifact_cache.delete(key) self.assertFalse(artifact_cache.has(key))
def execute(): for target in targets: target_workdir = target_workdirs[target] task.execute_codegen(target, target_workdir) task._handle_duplicate_sources(target, target_workdir) fingerprint = CacheKey("test", target.invalidation_hash()) syn_targets.append(task._inject_synthetic_target(target, target_workdir, fingerprint))
def test_successful_request_cleans_result_dir(self): key = CacheKey('muppet_key', 'fake_hash') with self.setup_local_cache() as cache: self._do_test_successful_request_cleans_result_dir(cache, key) with self.setup_rest_cache() as cache: self._do_test_successful_request_cleans_result_dir(cache, key)
def test_successful_request_calls_hit_callback(self): context = create_context() key = CacheKey('muppet_key', 'fake_hash', 42) with self.setup_local_cache() as cache: self._do_test_successful_request_runs_callback(cache, context, key) with self.setup_rest_cache() as cache: self._do_test_successful_request_runs_callback(cache, context, key)
def test_failed_multiproc(self): key = CacheKey('muppet_key', 'fake_hash') # Failed requests should return failure status, but not raise exceptions with self.setup_rest_cache(return_failed=True) as cache: self.assertFalse(call_use_cached_files((cache, key, None))) with self.setup_test_file(cache.artifact_root) as path: call_insert((cache, key, [path], False)) self.assertFalse(call_use_cached_files((cache, key, None)))
def test_failed_request_doesnt_call_hit_callback(self): context = create_context() key = CacheKey('muppet_key', 'fake_hash', 55) with self.setup_local_cache() as cache: self.assertEquals( context.subproc_map(call_use_cached_files, [(cache, key, raising_callback)]), [False]) with self.setup_rest_cache() as cache: self.assertEquals( context.subproc_map(call_use_cached_files, [(cache, key, raising_callback)]), [False])
def combine_cache_keys(cache_keys): if len(cache_keys) == 1: return cache_keys[0] else: sorted_cache_keys = sorted(cache_keys) # For commutativity. combined_id = ','.join( [cache_key.id for cache_key in sorted_cache_keys]) combined_hash = ','.join( [cache_key.hash for cache_key in sorted_cache_keys]) combined_num_sources = reduce( lambda x, y: x + y, [cache_key.num_sources for cache_key in sorted_cache_keys], 0) return CacheKey(combined_id, combined_hash, combined_num_sources)
def test_failed_request_doesnt_clean_result_dir(self): key = CacheKey('muppet_key', 'fake_hash') with temporary_dir() as results_dir: with temporary_file_path(root_dir=results_dir) as canary: with self.setup_local_cache() as cache: self.assertFalse( call_use_cached_files((cache, key, results_dir))) self.assertTrue(os.path.exists(canary)) with self.setup_rest_cache() as cache: self.assertFalse( call_use_cached_files((cache, key, results_dir))) self.assertTrue(os.path.exists(canary))
def test_equality(self): self.assertEqual(CacheKey(id="1", hash="a"), CacheKey(id="1", hash="a")) self.assertEqual(CacheKey.uncacheable(id="1"), CacheKey.uncacheable(id="1")) self.assertNotEqual(CacheKey(id="1", hash="a"), CacheKey(id="2", hash="a")) self.assertNotEqual(CacheKey.uncacheable(id="1"), CacheKey.uncacheable(id="2")) self.assertNotEqual(CacheKey(id="1", hash="a"), CacheKey(id="1", hash="b"))
def test_equality(self): self.assertEqual(CacheKey(id='1', hash='a'), CacheKey(id='1', hash='a')) self.assertEqual(CacheKey.uncacheable(id='1'), CacheKey.uncacheable(id='1')) self.assertNotEqual(CacheKey(id='1', hash='a'), CacheKey(id='2', hash='a')) self.assertNotEqual(CacheKey.uncacheable(id='1'), CacheKey.uncacheable(id='2')) self.assertNotEqual(CacheKey(id='1', hash='a'), CacheKey(id='1', hash='b'))
def test_failed_multiproc(self): context = create_context() key = CacheKey('muppet_key', 'fake_hash', 55) # Failed requests should return failure status, but not raise exceptions with self.setup_rest_cache(return_failed=True) as cache: self.assertFalse( context.subproc_map(call_use_cached_files, [(cache, key, None)])[0]) with self.setup_test_file(cache.artifact_root) as path: context.subproc_map(call_insert, [(cache, key, [path], False)]) self.assertFalse( context.subproc_map(call_use_cached_files, [(cache, key, None)])[0])
def test_multiproc(self): key = CacheKey('muppet_key', 'fake_hash') with self.setup_local_cache() as cache: self.assertFalse(call_use_cached_files((cache, key, None))) with self.setup_test_file(cache.artifact_root) as path: call_insert((cache, key, [path], False)) self.assertTrue(call_use_cached_files((cache, key, None))) with self.setup_rest_cache() as cache: self.assertFalse(call_use_cached_files((cache, key, None))) with self.setup_test_file(cache.artifact_root) as path: call_insert((cache, key, [path], False)) self.assertTrue(call_use_cached_files((cache, key, None)))
def test_corrupted_cached_file_cleaned_up(self): key = CacheKey('muppet_key', 'fake_hash') with self.setup_local_cache() as artifact_cache: with self.setup_test_file(artifact_cache.artifact_root) as path: artifact_cache.insert(key, [path]) tarfile = artifact_cache._cache_file_for_key(key) self.assertTrue(artifact_cache.use_cached_files(key)) self.assertTrue(os.path.exists(tarfile)) with open(tarfile, 'wb') as outfile: outfile.write(b'not a valid tgz any more') self.assertFalse(artifact_cache.use_cached_files(key)) self.assertFalse(os.path.exists(tarfile))
def test_local_backed_remote_cache(self): """make sure that the combined cache finds what it should and that it backfills.""" with self.setup_server() as server: with self.setup_local_cache() as local: tmp = TempLocalArtifactCache(local.artifact_root, local.artifact_extraction_root, 0) remote = RESTfulArtifactCache(local.artifact_root, BestUrlSelector([server.url]), tmp) combined = RESTfulArtifactCache(local.artifact_root, BestUrlSelector([server.url]), local) key = CacheKey("muppet_key", "fake_hash") with self.setup_test_file(local.artifact_root) as path: # No cache has key. self.assertFalse(local.has(key)) self.assertFalse(remote.has(key)) self.assertFalse(combined.has(key)) # No cache returns key. self.assertFalse(bool(local.use_cached_files(key))) self.assertFalse(bool(remote.use_cached_files(key))) self.assertFalse(bool(combined.use_cached_files(key))) # Attempting to use key that no cache had should not change anything. self.assertFalse(local.has(key)) self.assertFalse(remote.has(key)) self.assertFalse(combined.has(key)) # Add to only remote cache. remote.insert(key, [path]) # After insertion to remote, remote and only remote should have key self.assertFalse(local.has(key)) self.assertTrue(remote.has(key)) self.assertTrue(combined.has(key)) # Successfully using via remote should NOT change local. self.assertTrue(bool(remote.use_cached_files(key))) self.assertFalse(local.has(key)) # Successfully using via combined SHOULD backfill local. self.assertTrue(bool(combined.use_cached_files(key))) self.assertTrue(local.has(key)) self.assertTrue(bool(local.use_cached_files(key)))
def test_max_retries_exceeded(self): key = CacheKey("muppet_key", "fake_hash") # Assert that the global "retries exceeded" flag is set when retries are exceeded. with self.override_check_for_max_retry( should_check=True), self.setup_rest_cache( return_failed="connection-error" ) as cache, self.captured_logging(logging.WARNING) as captured: self.assertFalse(call_use_cached_files((cache, key, None))) self.assertTrue(RequestsSession._max_retries_exceeded) _, retry_warning = tuple(captured.warnings()) self.assertIn( "Maximum retries were exceeded for the current connection pool. Avoiding the remote cache for the rest of the pants process lifetime.", retry_warning, )
def test_multiproc(self): key = CacheKey('muppet_key', 'fake_hash', 42) with self.setup_local_cache() as cache: self.assertEquals(map(call_use_cached_files, [(cache, key, None)]), [False]) with self.setup_test_file(cache.artifact_root) as path: map(call_insert, [(cache, key, [path], False)]) self.assertEquals(map(call_use_cached_files, [(cache, key, None)]), [True]) with self.setup_rest_cache() as cache: self.assertEquals(map(call_use_cached_files, [(cache, key, None)]), [False]) with self.setup_test_file(cache.artifact_root) as path: map(call_insert, [(cache, key, [path], False)]) self.assertEquals(map(call_use_cached_files, [(cache, key, None)]), [True])
def test_noops_after_max_retries_exceeded(self): key = CacheKey("muppet_key", "fake_hash") with self.setup_rest_cache() as cache: # Assert that the artifact doesn't exist, then insert it and check that it exists. self.assertFalse(call_use_cached_files((cache, key, None))) with self.setup_test_file(cache.artifact_root) as path: call_insert((cache, key, [path], False)) self.assertTrue(call_use_cached_files((cache, key, None))) # No failed requests should have occurred yet, so no retries should have been triggered. self.assertFalse(RequestsSession._max_retries_exceeded) # Now assert that when max retries are exceeded, the cache returns 404s. with self.restore_max_retries_flag(): RequestsSession._max_retries_exceeded = True self.assertFalse(call_use_cached_files((cache, key, None))) # After the flag is toggled back, the cache successfully finds the entry. self.assertTrue(call_use_cached_files((cache, key, None)))
def test_local_backed_remote_cache_corrupt_artifact(self): """Ensure that a combined cache clears outputs after a failure to extract an artifact.""" with temporary_dir() as remote_cache_dir: with self.setup_server(cache_root=remote_cache_dir) as server: with self.setup_local_cache() as local: tmp = TempLocalArtifactCache( local.artifact_root, local.artifact_extraction_root, compression=1) remote = RESTfulArtifactCache( local.artifact_root, BestUrlSelector([server.url]), tmp) combined = RESTfulArtifactCache( local.artifact_root, BestUrlSelector([server.url]), local) key = CacheKey("muppet_key", "fake_hash") results_dir = os.path.join(local.artifact_root, "a/sub/dir") safe_mkdir(results_dir) self.assertTrue(os.path.exists(results_dir)) with self.setup_test_file(results_dir) as path: # Add to only the remote cache. remote.insert(key, [path]) # Corrupt the artifact in the remote storage. self.assertTrue( server.corrupt_artifacts(r".*muppet_key.*") == 1) # An attempt to read the corrupt artifact should fail. self.assertFalse( combined.use_cached_files(key, results_dir=results_dir)) # The local artifact should not have been stored, and the results_dir should exist, # but be empty. self.assertFalse(local.has(key)) self.assertTrue(os.path.exists(results_dir)) self.assertTrue(len(os.listdir(results_dir)) == 0)
def do_test_artifact_cache(self, artifact_cache): key = CacheKey('muppet_key', 'fake_hash') with self.setup_test_file(artifact_cache.artifact_root) as path: # Cache it. self.assertFalse(artifact_cache.has(key)) self.assertFalse(bool(artifact_cache.use_cached_files(key))) artifact_cache.insert(key, [path]) self.assertTrue(artifact_cache.has(key)) # Stomp it. with open(path, 'wb') as outfile: outfile.write(TEST_CONTENT2) # Recover it from the cache. self.assertTrue(bool(artifact_cache.use_cached_files(key))) # Check that it was recovered correctly. with open(path, 'rb') as infile: content = infile.read() self.assertEqual(content, TEST_CONTENT1) # Delete it. artifact_cache.delete(key) self.assertFalse(artifact_cache.has(key))
def cache_key(key_id=None, key_hash=None): return CacheKey(id=key_id or 'a.target', hash=key_hash or '42')
def test_combine_single(self): key = CacheKey(id='a', hash='b') self.assertIs(key, CacheKey.combine_cache_keys([key]))
def execute_antlr_test(self, expected_package, target_workdir_fun=None): target = self.get_antlr_target() context = self.create_context() task = self.prepare_execute(context) target_workdir_fun = target_workdir_fun or ( lambda x: safe_mkdtemp(dir=x)) # Do not use task.workdir here, because when we calculating hash for synthetic target # we need persistent source paths in terms of relative position to build root. target_workdir = target_workdir_fun(self.build_root) # Generate code, then create a synthetic target. task.execute_codegen(target, target_workdir) fingerprint = CacheKey("test", target.invalidation_hash()) syn_target = task._inject_synthetic_target(target, target_workdir, fingerprint) actual_sources = [ s for s in Fileset.rglobs('*.java', root=target_workdir) ] expected_sources = syn_target.sources_relative_to_source_root() self.assertEquals(set(expected_sources), set(actual_sources)) # Check that the synthetic target has a valid source root and the generated sources have the # expected java package def get_package(path): with open(path) as fp: for line in fp: match = self.PACKAGE_RE.match(line) if match: return match.group('package_name') return None for source in syn_target.sources_relative_to_source_root(): source_path = os.path.join(target_workdir, source) self.assertTrue( os.path.isfile(source_path), "{0} is not the source root for {1}".format( target_workdir, source)) self.assertEqual(expected_package, get_package(source_path)) self.assertIn(syn_target, context.targets()) # Check that the output file locations match the package if expected_package is not None: expected_path_prefix = expected_package.replace( '.', os.path.sep) + os.path.sep for source in syn_target.sources_relative_to_source_root(): self.assertTrue( source.startswith(expected_path_prefix), "{0} does not start with {1}".format( source, expected_path_prefix)) # Check that empty directories have been removed for root, dirs, files in os.walk(target_workdir): for d in dirs: full_dir = os.path.join(root, d) self.assertTrue( os.listdir(full_dir), "Empty directories should have been removed ({0})".format( full_dir)) return syn_target
def test_cacheable(self): self.assertTrue(CacheKey(id="1", hash="a").cacheable) self.assertFalse(CacheKey.uncacheable(id="1").cacheable)
def test_cacheable(self): self.assertTrue(CacheKey(id='1', hash='a').cacheable) self.assertFalse(CacheKey.uncacheable(id='1').cacheable)
def key_for(self, tid, sources): return CacheKey(tid, tid, len(sources))
def key_for_target(self, target, sources=None, transitive=False, fingerprint_strategy=None): return CacheKey(target.id, target.id, target.num_chunking_units)
def cache_key(cls, key_id=None, key_hash=None): return CacheKey(id=cls.ensure_key_id(key_id), hash=key_hash or '42')
def update_hash(cache_key, new_hash): return CacheKey(id=cache_key.id, hash=new_hash)