コード例 #1
0
ファイル: test_retry.py プロジェクト: lessc0de/arvados
 def test_save_result_after_end_is_error(self):
     retrier = arv_retry.RetryLoop(0)
     for count in retrier:
         pass
     self.assertRaises(arv_error.AssertionError, retrier.save_result, 1)
コード例 #2
0
ファイル: test_retry.py プロジェクト: lessc0de/arvados
 def run_loop(self, num_retries, *results, **kwargs):
     responses = itertools.chain(results, itertools.repeat(None))
     retrier = arv_retry.RetryLoop(num_retries, self.loop_success, **kwargs)
     for tries_left, response in itertools.izip(retrier, responses):
         retrier.save_result(response)
     return retrier
コード例 #3
0
ファイル: test_retry.py プロジェクト: lessc0de/arvados
 def test_save_result_before_start_is_error(self):
     retrier = arv_retry.RetryLoop(0)
     self.assertRaises(arv_error.AssertionError, retrier.save_result, 1)
コード例 #4
0
ファイル: keep.py プロジェクト: oliveralka/arvados
    def put(self, data, copies=2, num_retries=None):
        """Save data in Keep.

        This method will get a list of Keep services from the API server, and
        send the data to each one simultaneously in a new thread.  Once the
        uploads are finished, if enough copies are saved, this method returns
        the most recent HTTP response body.  If requests fail to upload
        enough copies, this method raises KeepWriteError.

        Arguments:
        * data: The string of data to upload.
        * copies: The number of copies that the user requires be saved.
          Default 2.
        * num_retries: The number of times to retry PUT requests to
          *each* Keep server if it returns temporary failures, with
          exponential backoff.  The default value is set when the
          KeepClient is initialized.
        """

        if not isinstance(data, bytes):
            data = data.encode()

        self.put_counter.add(1)

        data_hash = hashlib.md5(data).hexdigest()
        loc_s = data_hash + '+' + str(len(data))
        if copies < 1:
            return loc_s
        locator = KeepLocator(loc_s)

        headers = {}
        # Tell the proxy how many copies we want it to store
        headers['X-Keep-Desired-Replicas'] = str(copies)
        roots_map = {}
        loop = retry.RetryLoop(num_retries,
                               self._check_loop_result,
                               backoff_start=2)
        done = 0
        for tries_left in loop:
            try:
                sorted_roots = self.map_new_services(
                    roots_map,
                    locator,
                    force_rebuild=(tries_left < num_retries),
                    need_writable=True,
                    **headers)
            except Exception as error:
                loop.save_result(error)
                continue

            writer_pool = KeepClient.KeepWriterThreadPool(
                data=data,
                data_hash=data_hash,
                copies=copies - done,
                max_service_replicas=self.max_replicas_per_service,
                timeout=self.current_timeout(num_retries - tries_left))
            for service_root, ks in [(root, roots_map[root])
                                     for root in sorted_roots]:
                if ks.finished():
                    continue
                writer_pool.add_task(ks, service_root)
            writer_pool.join()
            done += writer_pool.done()
            loop.save_result((done >= copies, writer_pool.total_task_nr))

        if loop.success():
            return writer_pool.response()
        if not roots_map:
            raise arvados.errors.KeepWriteError(
                "failed to write {}: no Keep services available ({})".format(
                    data_hash, loop.last_result()))
        else:
            service_errors = ((key, roots_map[key].last_result()['error'])
                              for key in sorted_roots
                              if roots_map[key].last_result()['error'])
            raise arvados.errors.KeepWriteError(
                "failed to write {} (wanted {} copies but wrote {})".format(
                    data_hash, copies, writer_pool.done()),
                service_errors,
                label="service")
コード例 #5
0
ファイル: keep.py プロジェクト: oliveralka/arvados
    def _get_or_head(self, loc_s, method="GET", num_retries=None):
        """Get data from Keep.

        This method fetches one or more blocks of data from Keep.  It
        sends a request each Keep service registered with the API
        server (or the proxy provided when this client was
        instantiated), then each service named in location hints, in
        sequence.  As soon as one service provides the data, it's
        returned.

        Arguments:
        * loc_s: A string of one or more comma-separated locators to fetch.
          This method returns the concatenation of these blocks.
        * num_retries: The number of times to retry GET requests to
          *each* Keep server if it returns temporary failures, with
          exponential backoff.  Note that, in each loop, the method may try
          to fetch data from every available Keep service, along with any
          that are named in location hints in the locator.  The default value
          is set when the KeepClient is initialized.
        """
        if ',' in loc_s:
            return ''.join(self.get(x) for x in loc_s.split(','))

        self.get_counter.add(1)

        locator = KeepLocator(loc_s)
        if method == "GET":
            slot, first = self.block_cache.reserve_cache(locator.md5sum)
            if not first:
                self.hits_counter.add(1)
                v = slot.get()
                return v

        self.misses_counter.add(1)

        # If the locator has hints specifying a prefix (indicating a
        # remote keepproxy) or the UUID of a local gateway service,
        # read data from the indicated service(s) instead of the usual
        # list of local disk services.
        hint_roots = [
            'http://keep.{}.arvadosapi.com/'.format(hint[2:])
            for hint in locator.hints
            if hint.startswith('K@') and len(hint) == 7
        ]
        hint_roots.extend([
            self._gateway_services[hint[2:]]['_service_root']
            for hint in locator.hints
            if (hint.startswith('K@') and len(hint) == 29
                and self._gateway_services.get(hint[2:]))
        ])
        # Map root URLs to their KeepService objects.
        roots_map = {
            root: self.KeepService(root,
                                   self._user_agent_pool,
                                   upload_counter=self.upload_counter,
                                   download_counter=self.download_counter)
            for root in hint_roots
        }

        # See #3147 for a discussion of the loop implementation.  Highlights:
        # * Refresh the list of Keep services after each failure, in case
        #   it's being updated.
        # * Retry until we succeed, we're out of retries, or every available
        #   service has returned permanent failure.
        sorted_roots = []
        roots_map = {}
        blob = None
        loop = retry.RetryLoop(num_retries,
                               self._check_loop_result,
                               backoff_start=2)
        for tries_left in loop:
            try:
                sorted_roots = self.map_new_services(
                    roots_map,
                    locator,
                    force_rebuild=(tries_left < num_retries),
                    need_writable=False)
            except Exception as error:
                loop.save_result(error)
                continue

            # Query KeepService objects that haven't returned
            # permanent failure, in our specified shuffle order.
            services_to_try = [
                roots_map[root] for root in sorted_roots
                if roots_map[root].usable()
            ]
            for keep_service in services_to_try:
                blob = keep_service.get(
                    locator,
                    method=method,
                    timeout=self.current_timeout(num_retries - tries_left))
                if blob is not None:
                    break
            loop.save_result((blob, len(services_to_try)))

        # Always cache the result, then return it if we succeeded.
        if method == "GET":
            slot.set(blob)
            self.block_cache.cap_cache()
        if loop.success():
            if method == "HEAD":
                return True
            else:
                return blob

        # Q: Including 403 is necessary for the Keep tests to continue
        # passing, but maybe they should expect KeepReadError instead?
        not_founds = sum(1 for key in sorted_roots
                         if roots_map[key].last_result().get(
                             'status_code', None) in {403, 404, 410})
        service_errors = ((key, roots_map[key].last_result()['error'])
                          for key in sorted_roots)
        if not roots_map:
            raise arvados.errors.KeepReadError(
                "failed to read {}: no Keep services available ({})".format(
                    loc_s, loop.last_result()))
        elif not_founds == len(sorted_roots):
            raise arvados.errors.NotFoundError("{} not found".format(loc_s),
                                               service_errors)
        else:
            raise arvados.errors.KeepReadError(
                "failed to read {}".format(loc_s),
                service_errors,
                label="service")
コード例 #6
0
ファイル: keep.py プロジェクト: kbronstein/arvados
    def put(self, data, copies=2, num_retries=None):
        """Save data in Keep.

        This method will get a list of Keep services from the API server, and
        send the data to each one simultaneously in a new thread.  Once the
        uploads are finished, if enough copies are saved, this method returns
        the most recent HTTP response body.  If requests fail to upload
        enough copies, this method raises KeepWriteError.

        Arguments:
        * data: The string of data to upload.
        * copies: The number of copies that the user requires be saved.
          Default 2.
        * num_retries: The number of times to retry PUT requests to
          *each* Keep server if it returns temporary failures, with
          exponential backoff.  The default value is set when the
          KeepClient is initialized.
        """
        data_hash = hashlib.md5(data).hexdigest()
        if copies < 1:
            return data_hash

        headers = {}
        if self.using_proxy:
            # Tell the proxy how many copies we want it to store
            headers['X-Keep-Desired-Replication'] = str(copies)
        roots_map = {}
        thread_limiter = KeepClient.ThreadLimiter(copies)
        loop = retry.RetryLoop(num_retries,
                               self._check_loop_result,
                               backoff_start=2)
        for tries_left in loop:
            try:
                local_roots = self.map_new_services(
                    roots_map,
                    data_hash,
                    force_rebuild=(tries_left < num_retries),
                    **headers)
            except Exception as error:
                loop.save_result(error)
                continue

            threads = []
            for service_root, ks in roots_map.iteritems():
                if ks.finished():
                    continue
                t = KeepClient.KeepWriterThread(ks,
                                                data=data,
                                                data_hash=data_hash,
                                                service_root=service_root,
                                                thread_limiter=thread_limiter,
                                                timeout=self.timeout)
                t.start()
                threads.append(t)
            for t in threads:
                t.join()
            loop.save_result((thread_limiter.done() >= copies, len(threads)))

        if loop.success():
            return thread_limiter.response()
        raise arvados.errors.KeepWriteError(
            "Write fail for %s: wanted %d but wrote %d" %
            (data_hash, copies, thread_limiter.done()))
コード例 #7
0
ファイル: keep.py プロジェクト: kbronstein/arvados
    def get(self, loc_s, num_retries=None):
        """Get data from Keep.

        This method fetches one or more blocks of data from Keep.  It
        sends a request each Keep service registered with the API
        server (or the proxy provided when this client was
        instantiated), then each service named in location hints, in
        sequence.  As soon as one service provides the data, it's
        returned.

        Arguments:
        * loc_s: A string of one or more comma-separated locators to fetch.
          This method returns the concatenation of these blocks.
        * num_retries: The number of times to retry GET requests to
          *each* Keep server if it returns temporary failures, with
          exponential backoff.  Note that, in each loop, the method may try
          to fetch data from every available Keep service, along with any
          that are named in location hints in the locator.  The default value
          is set when the KeepClient is initialized.
        """
        if ',' in loc_s:
            return ''.join(self.get(x) for x in loc_s.split(','))
        locator = KeepLocator(loc_s)
        expect_hash = locator.md5sum

        slot, first = self.block_cache.reserve_cache(expect_hash)
        if not first:
            v = slot.get()
            return v

        # See #3147 for a discussion of the loop implementation.  Highlights:
        # * Refresh the list of Keep services after each failure, in case
        #   it's being updated.
        # * Retry until we succeed, we're out of retries, or every available
        #   service has returned permanent failure.
        hint_roots = [
            'http://keep.{}.arvadosapi.com/'.format(hint[2:])
            for hint in locator.hints if hint.startswith('K@')
        ]
        # Map root URLs their KeepService objects.
        roots_map = {root: self.KeepService(root) for root in hint_roots}
        blob = None
        loop = retry.RetryLoop(num_retries,
                               self._check_loop_result,
                               backoff_start=2)
        for tries_left in loop:
            try:
                local_roots = self.map_new_services(
                    roots_map,
                    expect_hash,
                    force_rebuild=(tries_left < num_retries))
            except Exception as error:
                loop.save_result(error)
                continue

            # Query KeepService objects that haven't returned
            # permanent failure, in our specified shuffle order.
            services_to_try = [
                roots_map[root] for root in (local_roots + hint_roots)
                if roots_map[root].usable()
            ]
            http = httplib2.Http(timeout=self.timeout)
            for keep_service in services_to_try:
                blob = keep_service.get(http, locator)
                if blob is not None:
                    break
            loop.save_result((blob, len(services_to_try)))

        # Always cache the result, then return it if we succeeded.
        slot.set(blob)
        self.block_cache.cap_cache()
        if loop.success():
            return blob

        # No servers fulfilled the request.  Count how many responded
        # "not found;" if the ratio is high enough (currently 75%), report
        # Not Found; otherwise a generic error.
        # Q: Including 403 is necessary for the Keep tests to continue
        # passing, but maybe they should expect KeepReadError instead?
        not_founds = sum(1 for ks in roots_map.values()
                         if ks.last_status() in set([403, 404, 410]))
        if roots_map and ((float(not_founds) / len(roots_map)) >= .75):
            raise arvados.errors.NotFoundError(loc_s)
        else:
            raise arvados.errors.KeepReadError(loc_s)
コード例 #8
0
    def put(self, data, copies=2, num_retries=None):
        """Save data in Keep.

        This method will get a list of Keep services from the API server, and
        send the data to each one simultaneously in a new thread.  Once the
        uploads are finished, if enough copies are saved, this method returns
        the most recent HTTP response body.  If requests fail to upload
        enough copies, this method raises KeepWriteError.

        Arguments:
        * data: The string of data to upload.
        * copies: The number of copies that the user requires be saved.
          Default 2.
        * num_retries: The number of times to retry PUT requests to
          *each* Keep server if it returns temporary failures, with
          exponential backoff.  The default value is set when the
          KeepClient is initialized.
        """

        if isinstance(data, unicode):
            data = data.encode("ascii")
        elif not isinstance(data, str):
            raise arvados.errors.ArgumentError("Argument 'data' to KeepClient.put must be type 'str'")

        data_hash = hashlib.md5(data).hexdigest()
        if copies < 1:
            return data_hash
        locator = KeepLocator(data_hash + '+' + str(len(data)))

        headers = {}
        if self.using_proxy:
            # Tell the proxy how many copies we want it to store
            headers['X-Keep-Desired-Replication'] = str(copies)
        roots_map = {}
        thread_limiter = KeepClient.ThreadLimiter(copies)
        loop = retry.RetryLoop(num_retries, self._check_loop_result,
                               backoff_start=2)
        for tries_left in loop:
            try:
                local_roots = self.map_new_services(
                    roots_map, locator,
                    force_rebuild=(tries_left < num_retries), **headers)
            except Exception as error:
                loop.save_result(error)
                continue

            threads = []
            for service_root, ks in roots_map.iteritems():
                if ks.finished():
                    continue
                t = KeepClient.KeepWriterThread(
                    ks,
                    data=data,
                    data_hash=data_hash,
                    service_root=service_root,
                    thread_limiter=thread_limiter,
                    timeout=self.current_timeout(num_retries-tries_left))
                t.start()
                threads.append(t)
            for t in threads:
                t.join()
            loop.save_result((thread_limiter.done() >= copies, len(threads)))

        if loop.success():
            return thread_limiter.response()
        if not roots_map:
            raise arvados.errors.KeepWriteError(
                "failed to write {}: no Keep services available ({})".format(
                    data_hash, loop.last_result()))
        else:
            service_errors = ((key, roots_map[key].last_result)
                              for key in local_roots
                              if not roots_map[key].success_flag)
            raise arvados.errors.KeepWriteError(
                "failed to write {} (wanted {} copies but wrote {})".format(
                    data_hash, copies, thread_limiter.done()), service_errors, label="service")