def __init__(self, cache_dir, labels, is_cache_complete=None, coder=None): if not coder: coder = SafeFastPrimitivesCoder() if not is_cache_complete: is_cache_complete = lambda _: True self._cache_dir = cache_dir self._coder = coder self._labels = labels self._path = os.path.join(self._cache_dir, *self._labels) self._is_cache_complete = is_cache_complete self._pipeline_id = CacheKey.from_str(labels[-1]).pipeline_id
def _wait_until_file_exists(self, timeout_secs=30): """Blocks until the file exists for a maximum of timeout_secs. """ # Wait for up to `timeout_secs` for the file to be available. start = time.time() while not os.path.exists(self._path): time.sleep(1) if time.time() - start > timeout_secs: pcollection_var = CacheKey.from_str(self._labels[-1]).var raise RuntimeError( 'Timed out waiting for cache file for PCollection `{}` to be ' 'available with path {}.'.format(pcollection_var, self._path)) return open(self._path, mode='rb')