Ejemplo n.º 1
0
    def use_cached_files(self, cache_key, results_dir=None):
        if self._localcache.has(cache_key):
            return self._localcache.use_cached_files(cache_key, results_dir)

        queue = multiprocessing.Queue()
        try:
            response = self._request('GET', cache_key)
            if response is not None:
                threading.Thread(
                    target=_log_if_no_response,
                    args=(
                        60,
                        "\nStill downloading artifacts (either they're very large or the connection to the cache is slow)",
                        queue.get,
                    )).start()
                # Delegate storage and extraction to local cache
                byte_iter = response.iter_content(self.READ_SIZE_BYTES)
                res = self._localcache.store_and_use_artifact(
                    cache_key, byte_iter, results_dir)
                queue.put(None)
                return res
        except Exception as e:
            logger.warn(
                '\nError while reading from remote artifact cache: {0}\n'.
                format(e))
            queue.put(None)
            # TODO(peiyu): clean up partially downloaded local file if any
            return UnreadableArtifact(cache_key, e)

        return False
Ejemplo n.º 2
0
    def use_cached_files(self, cache_key):
        try:
            tarfile = self._cache_file_for_key(cache_key)
            if os.path.exists(tarfile):
                self._artifact(tarfile).extract()
                return True
        except Exception as e:
            # TODO(davidt): Consider being more granular in what is caught.
            logger.warn(
                'Error while reading from local artifact cache: {0}'.format(e))
            return UnreadableArtifact(cache_key, e)

        return False
Ejemplo n.º 3
0
    def use_cached_files(self, cache_key, hit_callback=None):
        try:
            artifact = self._artifact_for(cache_key)
            if artifact.exists():
                if hit_callback:
                    hit_callback(cache_key)
                artifact.extract()
                return True
        except Exception as e:
            # TODO(davidt): Consider being more granular in what is caught.
            logger.warn(
                'Error while reading from local artifact cache: {0}'.format(e))
            return UnreadableArtifact(cache_key, e)

        return False
Ejemplo n.º 4
0
  def use_cached_files(self, cache_key):
    if self._localcache.has(cache_key):
      return self._localcache.use_cached_files(cache_key)

    remote_path = self._remote_path_for_key(cache_key)
    try:
      response = self._request('GET', remote_path)
      if response is not None:
        # Delegate storage and extraction to local cache
        byte_iter = response.iter_content(self.READ_SIZE_BYTES)
        return self._localcache.store_and_use_artifact(cache_key, byte_iter)
    except Exception as e:
      logger.warn('\nError while reading from remote artifact cache: {0}\n'.format(e))
      return UnreadableArtifact(cache_key, e)

    return False
Ejemplo n.º 5
0
  def use_cached_files(self, cache_key, results_dir=None):
    if self._localcache.has(cache_key):
      return self._localcache.use_cached_files(cache_key, results_dir)

    try:
      response = self._request('GET', cache_key)
      if response is not None:
        # Delegate storage and extraction to local cache
        byte_iter = response.iter_content(self.READ_SIZE_BYTES)
        return self._localcache.store_and_use_artifact(cache_key, byte_iter, results_dir)
    except Exception as e:
      logger.warn('\nError while reading from remote artifact cache: {0}\n'.format(e))
      # TODO(peiyu): clean up partially downloaded local file if any
      return UnreadableArtifact(cache_key, e)

    return False
Ejemplo n.º 6
0
  def use_cached_files(self, cache_key, results_dir=None):
    tarfile = self._cache_file_for_key(cache_key)
    try:
      artifact = self._artifact_for(cache_key)
      if artifact.exists():
        if results_dir is not None:
          safe_rmtree(results_dir)
        artifact.extract()
        return True
    except Exception as e:
      # TODO(davidt): Consider being more granular in what is caught.
      logger.warn('Error while reading {0} from local artifact cache: {1}'.format(tarfile, e))
      safe_delete(tarfile)
      return UnreadableArtifact(cache_key, e)

    return False
Ejemplo n.º 7
0
    def use_cached_files(self, cache_key, results_dir=None):
        if self._localcache.has(cache_key):
            return self._localcache.use_cached_files(cache_key, results_dir)

        # The queue is used as a semaphore here, containing only a single None element. A background
        # thread is kicked off which waits with the specified timeout for the single queue element, and
        # prints a warning message if the timeout is breached.
        queue = multiprocessing.Queue()
        try:
            response = self._request("GET", cache_key)
            if response is not None:
                threading.Thread(
                    target=_log_if_no_response,
                    args=(
                        RequestsSession._instance().
                        slow_download_timeout_seconds,
                        "\nStill downloading artifacts (either they're very large or the connection to the cache is slow)",
                        queue.get,
                    ),
                ).start()
                # Delegate storage and extraction to local cache
                byte_iter = response.iter_content(self.READ_SIZE_BYTES)
                res = self._localcache.store_and_use_artifact(
                    cache_key, byte_iter, results_dir)
                queue.put(None)
                return res
        except Exception as e:
            logger.warning(
                "\nError while reading from remote artifact cache: {0}\n".
                format(e))
            queue.put(None)
            # If we exceed the retry limits, set a global flag to avoid using the cache for the rest of
            # the pants process lifetime.
            if isinstance(e, MaxRetryError):
                logger.warning(
                    "\nMaximum retries were exceeded for the current connection pool. Avoiding "
                    "the remote cache for the rest of the pants process lifetime.\n"
                )
                RequestsSession._max_retries_exceeded = True
            # TODO(peiyu): clean up partially downloaded local file if any
            return UnreadableArtifact(cache_key, e)

        return False
Ejemplo n.º 8
0
    def use_cached_files(self, cache_key, results_dir=None):
        logger.debug('GET {0}'.format(cache_key))
        if self._localcache.has(cache_key):
            return self._localcache.use_cached_files(cache_key, results_dir)

        s3_object = self._get_object(cache_key)
        try:
            get_result = s3_object.get()
        except Exception as e:
            _log_and_classify_error(e, 'GET', cache_key)
            return False

        # Delegate storage and extraction to local cache
        try:
            return self._localcache.store_and_use_artifact(
                cache_key, iter_content(get_result['Body']), results_dir)
        except Exception as e:
            result = _log_and_classify_error(e, 'GET', cache_key)
            if result == _UNKNOWN:
                return UnreadableArtifact(cache_key, e)
            return False
Ejemplo n.º 9
0
class ArtifactCacheStatsTest(TestBase):
    TEST_CACHE_NAME_1 = 'ZincCompile'
    TEST_CACHE_NAME_2 = 'Checkstyle_test_checkstyle'
    TEST_LOCAL_ERROR = UnreadableArtifact('foo',
                                          ArtifactError('CRC check failed'))
    TEST_REMOTE_ERROR = UnreadableArtifact(
        'bar',
        NonfatalArtifactCacheError(
            requests.exceptions.ConnectionError('Read time out')))
    TEST_SPEC_A = 'src/scala/a'
    TEST_SPEC_B = 'src/scala/b'
    TEST_SPEC_C = 'src/java/c'

    def setUp(self):
        super().setUp()

        self.target_a = self.make_target(spec=self.TEST_SPEC_A)
        self.target_b = self.make_target(spec=self.TEST_SPEC_B)
        self.target_c = self.make_target(spec=self.TEST_SPEC_C)

    def test_add_hits(self):
        expected_stats = [
            {
                'cache_name': self.TEST_CACHE_NAME_2,
                'num_hits': 0,
                'num_misses': 1,
                'hits': [],
                'misses': [(self.TEST_SPEC_A, str(self.TEST_LOCAL_ERROR.err))]
            },
            {
                'cache_name': self.TEST_CACHE_NAME_1,
                'num_hits': 1,
                'num_misses': 1,
                'hits': [(self.TEST_SPEC_B, '')],
                'misses': [(self.TEST_SPEC_C, str(self.TEST_REMOTE_ERROR.err))]
            },
        ]

        expected_hit_or_miss_files = {
            '{}.misses'.format(self.TEST_CACHE_NAME_2):
            '{} {}\n'.format(self.TEST_SPEC_A, str(self.TEST_LOCAL_ERROR.err)),
            '{}.hits'.format(self.TEST_CACHE_NAME_1):
            '{}\n'.format(self.TEST_SPEC_B),
            '{}.misses'.format(self.TEST_CACHE_NAME_1):
            '{} {}\n'.format(self.TEST_SPEC_C,
                             str(self.TEST_REMOTE_ERROR.err)),
        }

        with self.mock_artifact_cache_stats(expected_stats,
                                            expected_hit_or_miss_files=expected_hit_or_miss_files)\
            as artifact_cache_stats:
            artifact_cache_stats.add_hits(self.TEST_CACHE_NAME_1,
                                          [self.target_b])
            artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_1,
                                            [self.target_c],
                                            [self.TEST_REMOTE_ERROR])
            artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_2,
                                            [self.target_a],
                                            [self.TEST_LOCAL_ERROR])

    @contextmanager
    def mock_artifact_cache_stats(self,
                                  expected_stats,
                                  expected_hit_or_miss_files=None):
        with temporary_dir() as tmp_dir:
            artifact_cache_stats = ArtifactCacheStats(tmp_dir)
            yield artifact_cache_stats
            self.assertEqual(
                sorted(expected_stats, key=lambda s: s['cache_name']),
                sorted(artifact_cache_stats.get_all(),
                       key=lambda s: s['cache_name']))

            self.assertEqual(sorted(list(expected_hit_or_miss_files.keys())),
                             sorted(os.listdir(tmp_dir)))
            for hit_or_miss_file in expected_hit_or_miss_files.keys():
                with open(os.path.join(tmp_dir, hit_or_miss_file),
                          'r') as hit_or_miss_saved:
                    self.assertEqual(
                        expected_hit_or_miss_files[hit_or_miss_file],
                        hit_or_miss_saved.read())
Ejemplo n.º 10
0
class ArtifactCacheStatsTest(TestBase):
    TEST_CACHE_NAME_1 = "ZincCompile"
    TEST_CACHE_NAME_2 = "Checkstyle_test_checkstyle"
    TEST_LOCAL_ERROR = UnreadableArtifact("foo",
                                          ArtifactError("CRC check failed"))
    TEST_REMOTE_ERROR = UnreadableArtifact(
        "bar",
        NonfatalArtifactCacheError(
            requests.exceptions.ConnectionError("Read time out")))
    TEST_SPEC_A = "src/scala/a"
    TEST_SPEC_B = "src/scala/b"
    TEST_SPEC_C = "src/java/c"

    def setUp(self):
        super().setUp()

        self.target_a = self.make_target(spec=self.TEST_SPEC_A)
        self.target_b = self.make_target(spec=self.TEST_SPEC_B)
        self.target_c = self.make_target(spec=self.TEST_SPEC_C)

    def test_add_hits(self):
        expected_stats = [
            {
                "cache_name": self.TEST_CACHE_NAME_2,
                "num_hits": 0,
                "num_misses": 1,
                "hits": [],
                "misses": [(self.TEST_SPEC_A, str(self.TEST_LOCAL_ERROR.err))],
            },
            {
                "cache_name": self.TEST_CACHE_NAME_1,
                "num_hits": 1,
                "num_misses": 1,
                "hits": [(self.TEST_SPEC_B, "")],
                "misses":
                [(self.TEST_SPEC_C, str(self.TEST_REMOTE_ERROR.err))],
            },
        ]

        expected_hit_or_miss_files = {
            f"{self.TEST_CACHE_NAME_2}.misses":
            f"{self.TEST_SPEC_A} {str(self.TEST_LOCAL_ERROR.err)}\n",
            f"{self.TEST_CACHE_NAME_1}.hits":
            f"{self.TEST_SPEC_B}\n",
            f"{self.TEST_CACHE_NAME_1}.misses":
            f"{self.TEST_SPEC_C} {str(self.TEST_REMOTE_ERROR.err)}\n",
        }

        with self.mock_artifact_cache_stats(
                expected_stats,
                expected_hit_or_miss_files=expected_hit_or_miss_files
        ) as artifact_cache_stats:
            artifact_cache_stats.add_hits(self.TEST_CACHE_NAME_1,
                                          [self.target_b])
            artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_1,
                                            [self.target_c],
                                            [self.TEST_REMOTE_ERROR])
            artifact_cache_stats.add_misses(self.TEST_CACHE_NAME_2,
                                            [self.target_a],
                                            [self.TEST_LOCAL_ERROR])

    @contextmanager
    def mock_artifact_cache_stats(self,
                                  expected_stats,
                                  expected_hit_or_miss_files=None):
        with temporary_dir() as tmp_dir:
            artifact_cache_stats = ArtifactCacheStats(tmp_dir)
            yield artifact_cache_stats
            self.assertEqual(
                sorted(expected_stats, key=lambda s: s["cache_name"]),
                sorted(artifact_cache_stats.get_all(),
                       key=lambda s: s["cache_name"]),
            )

            self.assertEqual(sorted(list(expected_hit_or_miss_files.keys())),
                             sorted(os.listdir(tmp_dir)))
            for hit_or_miss_file in expected_hit_or_miss_files.keys():
                with open(os.path.join(tmp_dir, hit_or_miss_file),
                          "r") as hit_or_miss_saved:
                    self.assertEqual(
                        expected_hit_or_miss_files[hit_or_miss_file],
                        hit_or_miss_saved.read())