def check_resource(resource, get_method=None, query_string='', wait_time=1, retries=None, raise_on_error=False, api=None): """Waits until a resource is finished. Given a resource and its corresponding get_method (if absent, the generic get_resource is used), it calls the get_method on the resource with the given query_string and waits with sleeping intervals of wait_time until the resource is in a final state (either FINISHED or FAULTY. The number of retries can be limited using the retries parameter. """ if isinstance(resource, basestring): resource_id = resource else: resource_id = get_resource_id(resource) resource_id = get_resource_id(resource) if resource_id is None: raise ValueError("Failed to extract a valid resource id to check.") kwargs = {'query_string': query_string} if get_method is None and hasattr(api, 'get_resource'): get_method = api.get_resource elif get_method is None: raise ValueError("You must supply either the get_method or the api" " connection info to retrieve the resource") if isinstance(resource, basestring): resource = get_method(resource, **kwargs) counter = 0 while retries is None or counter < retries: counter += 1 status = get_status(resource) code = status['code'] if code == c.FINISHED: if counter > 1: # final get call to retrieve complete resource resource = get_method(resource, **kwargs) if raise_on_error: exception_on_error(resource) return resource elif code == c.FAULTY: raise ValueError(status) time.sleep(get_exponential_wait(wait_time, counter)) # retries for the finished status use a query string that gets the # minimal available resource if kwargs.get('query_string') is not None: tiny_kwargs = {'query_string': c.TINY_RESOURCE} else: tiny_kwargs = {} resource = get_method(resource, **tiny_kwargs) if raise_on_error: exception_on_error(resource) return resource
def logged_wait(start, delta, count, res_description): """Comparing the elapsed time to the expected delta and waiting for the next sleep period. """ wait_time = min(get_exponential_wait(delta / 100.0, count), delta) print("Sleeping %s" % wait_time) time.sleep(wait_time) elapsed = (datetime.datetime.utcnow() - start).seconds if elapsed > delta / 2.0: print("%s seconds waiting for %s" % \ (elapsed, res_description)) assert_less(elapsed, delta)
requests.RequestException), exc: LOGGER.error("HTTP request error: %s", str(exc)) return file_object try: code = response.status_code if code == HTTP_OK: # starting the dataset export procedure if response.headers.get("content-type") == JSON_TYPE: try: if counter < retries: download_status = json_load(response.content) if download_status and isinstance( download_status, dict): if download_status['status']['code'] != 5: time.sleep( get_exponential_wait( wait_time, counter)) counter += 1 return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=counter) else: return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=retries + 1) elif counter == retries: LOGGER.error("The maximum number of retries " " for the download has been "
def check_resource(resource, get_method=None, query_string='', wait_time=1, retries=None, raise_on_error=False, max_elapsed_estimate=float('inf'), api=None, debug=False): """Waits until a resource is finished. Given a resource and its corresponding get_method (if absent, the generic get_resource is used), it calls the get_method on the resource with the given query_string and waits with sleeping intervals of wait_time until the resource is in a final state (either FINISHED or FAULTY. The number of retries can be limited using the retries parameter. """ resource_id = get_resource_id(resource) # ephemeral predictions if isinstance(resource, dict) and resource.get("resource") is None: return resource if resource_id is None: raise ValueError("Failed to extract a valid resource id to check.") if wait_time <= 0: raise ValueError("The time to wait needs to be positive.") debug = debug or (api is not None and (api.debug or api.short_debug)) if debug: print("Checking resource: %s" % resource_id) kwargs = {'query_string': query_string} if get_method is None and hasattr(api, 'get_resource'): get_method = api.get_resource elif get_method is None: raise ValueError("You must supply either the get_method or the api" " connection info to retrieve the resource") if isinstance(resource, str): if debug: print("Getting resource %s" % resource_id) resource = get_method(resource_id, **kwargs) counter = 0 elapsed = 0 while retries is None or counter < retries: counter += 1 status = get_status(resource) code = status['code'] if debug: print("The resource has status code: %s" % code) if code == c.FINISHED: if counter > 1: if debug: print("Getting resource %s with args %s" % (resource_id, kwargs)) # final get call to retrieve complete resource resource = get_method(resource, **kwargs) if raise_on_error: exception_on_error(resource) return resource if code == c.FAULTY: if raise_on_error: exception_on_error(resource) return resource _wait_time = get_exponential_wait(wait_time, counter) _max_wait = max_elapsed_estimate - _wait_time _wait_time = min(_max_wait, _wait_time) if _wait_time <= 0: # when the max_expected_elapsed time is met, we still wait for # the resource to be finished but we restart all counters and # the exponentially growing time is initialized _wait_time = wait_time counter = 0 elapsed = 0 if debug: print("Sleeping %s" % _wait_time) time.sleep(_wait_time) elapsed += _wait_time # retries for the finished status use a query string that gets the # minimal available resource if kwargs.get('query_string') is not None: tiny_kwargs = {'query_string': c.TINY_RESOURCE} else: tiny_kwargs = {} if debug: print("Getting only status for resource %s" % resource_id) resource = get_method(resource, **tiny_kwargs) if raise_on_error: exception_on_error(resource) return resource
requests.Timeout, requests.RequestException), exc: LOGGER.error("HTTP request error: %s", str(exc)) return file_object try: code = response.status_code if code == HTTP_OK: # starting the dataset export procedure if response.headers.get("content-type") == JSON_TYPE: try: if counter < retries: download_status = json_load(response.content) if download_status and isinstance(download_status, dict): if download_status['status']['code'] != 5: time.sleep(get_exponential_wait(wait_time, counter)) counter += 1 return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=counter) else: return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=retries + 1) elif counter == retries: LOGGER.error("The maximum number of retries " " for the download has been "
def _download(self, url, filename=None, wait_time=10, retries=10, counter=0): """Retrieves a remote file. Uses HTTP GET to download a file object with a BigML `url`. """ code = HTTP_INTERNAL_SERVER_ERROR file_object = None # if retries for the creation and download have been exhausted, # return None if counter > 2 * retries: LOGGER.error("Retries exhausted trying to download the file.") return file_object if GAE_ENABLED: try: req_options = { 'url': self._add_credentials(url), 'method': urlfetch.GET, 'validate_certificate': self.verify } response = urlfetch.fetch(**req_options) except urlfetch.Error as exception: LOGGER.error("HTTP request error: %s", str(exception)) return file_object else: try: response = requests.get(self._add_credentials(url), verify=self.verify, stream=True) except (requests.ConnectionError, requests.Timeout, requests.RequestException) as exc: LOGGER.error("HTTP request error: %s", str(exc)) return file_object try: code = response.status_code if code == HTTP_OK: # starting the dataset export procedure if response.headers.get("content-type") == JSON_TYPE: try: if counter < retries: download_status = json_load(response.content) if download_status and isinstance( download_status, dict): if download_status['status']['code'] != 5: time.sleep( get_exponential_wait( wait_time, counter)) counter += 1 return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=counter) return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=retries + 1) elif counter == retries: LOGGER.error("The maximum number of retries " " for the download has been " " exceeded. You can retry your " " command again in" " a while.") return None except ValueError: LOGGER.error("Failed getting a valid JSON structure.") else: # When download starts, content-type is no longer a # JSON object. if filename is not None and GAE_ENABLED: LOGGER.error("No support for downloading" " to local files in Google App Engine.") filename = None if filename is None: if GAE_ENABLED: file_object = io.StringIO(response.content) else: file_object = response.raw else: try: total_size = int( response.headers.get("content-length")) except ValueError: total_size = None file_size = stream_copy(response, filename) if file_size == 0: LOGGER.error("Error copying file to %s", filename) else: file_object = filename # if transient connection errors prevent the download, # retry if total_size is None or file_size < total_size: LOGGER.error( "Error downloading: " "total size=%s, %s downloaded", total_size, file_size) time.sleep(get_exponential_wait( wait_time, counter)) return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=counter + 1) elif code in [ HTTP_BAD_REQUEST, HTTP_UNAUTHORIZED, HTTP_NOT_FOUND, HTTP_TOO_MANY_REQUESTS ]: error = response.content LOGGER.error("Error downloading: %s", error) else: LOGGER.error("Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR except ValueError: LOGGER.error("Malformed response") return file_object
stream=True) except (requests.ConnectionError, requests.Timeout, requests.RequestException), exc: LOGGER.error("HTTP request error: %s", str(exc)) return file_object try: code = response.status_code if code == HTTP_OK: try: if counter < retries: download_status = json_load(response.content) if download_status and isinstance( download_status, dict): if download_status['status']['code'] != 5: time.sleep( get_exponential_wait(wait_time, counter)) counter += 1 return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=counter) else: return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=retries + 1) elif counter == retries: LOGGER.error("The maximum number of retries " " for the download has been "
def _download(self, url, filename=None, wait_time=10, retries=10, counter=0): """Retrieves a remote file. Uses HTTP GET to download a file object with a BigML `url`. """ code = HTTP_INTERNAL_SERVER_ERROR file_object = None response = requests.get(url + self.auth, verify=self.verify, stream=True) code = response.status_code if code == HTTP_OK: try: if counter < retries: download_status = json.loads(response.content) if download_status and isinstance(download_status, dict): if download_status['status']['code'] != 5: time.sleep(get_exponential_wait(wait_time, counter)) counter += 1 return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=counter) else: return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=retries + 1) elif counter == retries: LOGGER.error("The maximum number of retries " " for the download has been " " exceeded. You can retry your " " command again in" " a while.") return None except ValueError: # When download starts, response.content is no longer a JSON # object and we must retry the download without testing for # JSON content. This is achieved by using counter = retries + 1 # to single out this case. if counter < retries: return self._download(url, filename=filename, wait_time=wait_time, retries=retries, counter=retries + 1) if filename is None: file_object = response.raw else: file_size = stream_copy(response, filename) if file_size == 0: LOGGER.error("Error copying file to %s", filename) else: file_object = filename elif code in [HTTP_BAD_REQUEST, HTTP_UNAUTHORIZED, HTTP_NOT_FOUND, HTTP_TOO_MANY_REQUESTS]: error = response.content LOGGER.error("Error downloading: %s", error) else: LOGGER.error("Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR return file_object