def use_cached_files(self, cache_key, results_dir=None): if self._localcache.has(cache_key): return self._localcache.use_cached_files(cache_key, results_dir) queue = multiprocessing.Queue() try: response = self._request('GET', cache_key) if response is not None: threading.Thread( target=_log_if_no_response, args=( 60, "\nStill downloading artifacts (either they're very large or the connection to the cache is slow)", queue.get, )).start() # Delegate storage and extraction to local cache byte_iter = response.iter_content(self.READ_SIZE_BYTES) res = self._localcache.store_and_use_artifact( cache_key, byte_iter, results_dir) queue.put(None) return res except Exception as e: logger.warn( '\nError while reading from remote artifact cache: {0}\n'. format(e)) queue.put(None) # TODO(peiyu): clean up partially downloaded local file if any return UnreadableArtifact(cache_key, e) return False
def use_cached_files(self, cache_key): try: tarfile = self._cache_file_for_key(cache_key) if os.path.exists(tarfile): self._artifact(tarfile).extract() return True except Exception as e: # TODO(davidt): Consider being more granular in what is caught. logger.warn( 'Error while reading from local artifact cache: {0}'.format(e)) return UnreadableArtifact(cache_key, e) return False
def use_cached_files(self, cache_key, hit_callback=None): try: artifact = self._artifact_for(cache_key) if artifact.exists(): if hit_callback: hit_callback(cache_key) artifact.extract() return True except Exception as e: # TODO(davidt): Consider being more granular in what is caught. logger.warn( 'Error while reading from local artifact cache: {0}'.format(e)) return UnreadableArtifact(cache_key, e) return False
def use_cached_files(self, cache_key): if self._localcache.has(cache_key): return self._localcache.use_cached_files(cache_key) remote_path = self._remote_path_for_key(cache_key) try: response = self._request('GET', remote_path) if response is not None: # Delegate storage and extraction to local cache byte_iter = response.iter_content(self.READ_SIZE_BYTES) return self._localcache.store_and_use_artifact(cache_key, byte_iter) except Exception as e: logger.warn('\nError while reading from remote artifact cache: {0}\n'.format(e)) return UnreadableArtifact(cache_key, e) return False
def use_cached_files(self, cache_key, results_dir=None): if self._localcache.has(cache_key): return self._localcache.use_cached_files(cache_key, results_dir) try: response = self._request('GET', cache_key) if response is not None: # Delegate storage and extraction to local cache byte_iter = response.iter_content(self.READ_SIZE_BYTES) return self._localcache.store_and_use_artifact(cache_key, byte_iter, results_dir) except Exception as e: logger.warn('\nError while reading from remote artifact cache: {0}\n'.format(e)) # TODO(peiyu): clean up partially downloaded local file if any return UnreadableArtifact(cache_key, e) return False
def use_cached_files(self, cache_key, results_dir=None): tarfile = self._cache_file_for_key(cache_key) try: artifact = self._artifact_for(cache_key) if artifact.exists(): if results_dir is not None: safe_rmtree(results_dir) artifact.extract() return True except Exception as e: # TODO(davidt): Consider being more granular in what is caught. logger.warn('Error while reading {0} from local artifact cache: {1}'.format(tarfile, e)) safe_delete(tarfile) return UnreadableArtifact(cache_key, e) return False
def use_cached_files(self, cache_key, results_dir=None): if self._localcache.has(cache_key): return self._localcache.use_cached_files(cache_key, results_dir) # The queue is used as a semaphore here, containing only a single None element. A background # thread is kicked off which waits with the specified timeout for the single queue element, and # prints a warning message if the timeout is breached. queue = multiprocessing.Queue() try: response = self._request("GET", cache_key) if response is not None: threading.Thread( target=_log_if_no_response, args=( RequestsSession._instance(). slow_download_timeout_seconds, "\nStill downloading artifacts (either they're very large or the connection to the cache is slow)", queue.get, ), ).start() # Delegate storage and extraction to local cache byte_iter = response.iter_content(self.READ_SIZE_BYTES) res = self._localcache.store_and_use_artifact( cache_key, byte_iter, results_dir) queue.put(None) return res except Exception as e: logger.warning( "\nError while reading from remote artifact cache: {0}\n". format(e)) queue.put(None) # If we exceed the retry limits, set a global flag to avoid using the cache for the rest of # the pants process lifetime. if isinstance(e, MaxRetryError): logger.warning( "\nMaximum retries were exceeded for the current connection pool. Avoiding " "the remote cache for the rest of the pants process lifetime.\n" ) RequestsSession._max_retries_exceeded = True # TODO(peiyu): clean up partially downloaded local file if any return UnreadableArtifact(cache_key, e) return False
def use_cached_files(self, cache_key, results_dir=None): logger.debug('GET {0}'.format(cache_key)) if self._localcache.has(cache_key): return self._localcache.use_cached_files(cache_key, results_dir) s3_object = self._get_object(cache_key) try: get_result = s3_object.get() except Exception as e: _log_and_classify_error(e, 'GET', cache_key) return False # Delegate storage and extraction to local cache try: return self._localcache.store_and_use_artifact( cache_key, iter_content(get_result['Body']), results_dir) except Exception as e: result = _log_and_classify_error(e, 'GET', cache_key) if result == _UNKNOWN: return UnreadableArtifact(cache_key, e) return False
class ArtifactCacheStatsTest(TestBase): TEST_CACHE_NAME_1 = 'ZincCompile' TEST_CACHE_NAME_2 = 'Checkstyle_test_checkstyle' TEST_LOCAL_ERROR = UnreadableArtifact('foo', ArtifactError('CRC check failed')) TEST_REMOTE_ERROR = UnreadableArtifact( 'bar', NonfatalArtifactCacheError( requests.exceptions.ConnectionError('Read time out'))) TEST_SPEC_A = 'src/scala/a' TEST_SPEC_B = 'src/scala/b' TEST_SPEC_C = 'src/java/c' def setUp(self): super().setUp() self.target_a = self.make_target(spec=self.TEST_SPEC_A) self.target_b = self.make_target(spec=self.TEST_SPEC_B) self.target_c = self.make_target(spec=self.TEST_SPEC_C) def test_add_hits(self): expected_stats = [ { 'cache_name': self.TEST_CACHE_NAME_2, 'num_hits': 0, 'num_misses': 1, 'hits': [], 'misses': [(self.TEST_SPEC_A, str(self.TEST_LOCAL_ERROR.err))] }, { 'cache_name': self.TEST_CACHE_NAME_1, 'num_hits': 1, 'num_misses': 1, 'hits': [(self.TEST_SPEC_B, '')], 'misses': [(self.TEST_SPEC_C, str(self.TEST_REMOTE_ERROR.err))] }, ] expected_hit_or_miss_files = { '{}.misses'.format(self.TEST_CACHE_NAME_2): '{} {}\n'.format(self.TEST_SPEC_A, str(self.TEST_LOCAL_ERROR.err)), '{}.hits'.format(self.TEST_CACHE_NAME_1): '{}\n'.format(self.TEST_SPEC_B), '{}.misses'.format(self.TEST_CACHE_NAME_1): '{} {}\n'.format(self.TEST_SPEC_C, str(self.TEST_REMOTE_ERROR.err)), } with self.mock_artifact_cache_stats(expected_stats, expected_hit_or_miss_files=expected_hit_or_miss_files)\ as artifact_cache_stats: artifact_cache_stats.add_hits(self.TEST_CACHE_NAME_1, [self.target_b]) artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_1, [self.target_c], [self.TEST_REMOTE_ERROR]) artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_2, [self.target_a], [self.TEST_LOCAL_ERROR]) @contextmanager def mock_artifact_cache_stats(self, expected_stats, expected_hit_or_miss_files=None): with temporary_dir() as tmp_dir: artifact_cache_stats = ArtifactCacheStats(tmp_dir) yield artifact_cache_stats self.assertEqual( sorted(expected_stats, key=lambda s: s['cache_name']), sorted(artifact_cache_stats.get_all(), key=lambda s: s['cache_name'])) self.assertEqual(sorted(list(expected_hit_or_miss_files.keys())), sorted(os.listdir(tmp_dir))) for hit_or_miss_file in expected_hit_or_miss_files.keys(): with open(os.path.join(tmp_dir, hit_or_miss_file), 'r') as hit_or_miss_saved: self.assertEqual( expected_hit_or_miss_files[hit_or_miss_file], hit_or_miss_saved.read())
class ArtifactCacheStatsTest(TestBase): TEST_CACHE_NAME_1 = "ZincCompile" TEST_CACHE_NAME_2 = "Checkstyle_test_checkstyle" TEST_LOCAL_ERROR = UnreadableArtifact("foo", ArtifactError("CRC check failed")) TEST_REMOTE_ERROR = UnreadableArtifact( "bar", NonfatalArtifactCacheError( requests.exceptions.ConnectionError("Read time out"))) TEST_SPEC_A = "src/scala/a" TEST_SPEC_B = "src/scala/b" TEST_SPEC_C = "src/java/c" def setUp(self): super().setUp() self.target_a = self.make_target(spec=self.TEST_SPEC_A) self.target_b = self.make_target(spec=self.TEST_SPEC_B) self.target_c = self.make_target(spec=self.TEST_SPEC_C) def test_add_hits(self): expected_stats = [ { "cache_name": self.TEST_CACHE_NAME_2, "num_hits": 0, "num_misses": 1, "hits": [], "misses": [(self.TEST_SPEC_A, str(self.TEST_LOCAL_ERROR.err))], }, { "cache_name": self.TEST_CACHE_NAME_1, "num_hits": 1, "num_misses": 1, "hits": [(self.TEST_SPEC_B, "")], "misses": [(self.TEST_SPEC_C, str(self.TEST_REMOTE_ERROR.err))], }, ] expected_hit_or_miss_files = { f"{self.TEST_CACHE_NAME_2}.misses": f"{self.TEST_SPEC_A} {str(self.TEST_LOCAL_ERROR.err)}\n", f"{self.TEST_CACHE_NAME_1}.hits": f"{self.TEST_SPEC_B}\n", f"{self.TEST_CACHE_NAME_1}.misses": f"{self.TEST_SPEC_C} {str(self.TEST_REMOTE_ERROR.err)}\n", } with self.mock_artifact_cache_stats( expected_stats, expected_hit_or_miss_files=expected_hit_or_miss_files ) as artifact_cache_stats: artifact_cache_stats.add_hits(self.TEST_CACHE_NAME_1, [self.target_b]) artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_1, [self.target_c], [self.TEST_REMOTE_ERROR]) artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_2, [self.target_a], [self.TEST_LOCAL_ERROR]) @contextmanager def mock_artifact_cache_stats(self, expected_stats, expected_hit_or_miss_files=None): with temporary_dir() as tmp_dir: artifact_cache_stats = ArtifactCacheStats(tmp_dir) yield artifact_cache_stats self.assertEqual( sorted(expected_stats, key=lambda s: s["cache_name"]), sorted(artifact_cache_stats.get_all(), key=lambda s: s["cache_name"]), ) self.assertEqual(sorted(list(expected_hit_or_miss_files.keys())), sorted(os.listdir(tmp_dir))) for hit_or_miss_file in expected_hit_or_miss_files.keys(): with open(os.path.join(tmp_dir, hit_or_miss_file), "r") as hit_or_miss_saved: self.assertEqual( expected_hit_or_miss_files[hit_or_miss_file], hit_or_miss_saved.read())