def request(self, url, method, *args, **kwargs): filename = url if filename.startswith(self.openshift_api_uri): filename = filename[len(self.openshift_api_uri):] if filename.startswith(self.k8s_api_uri): filename = filename[len(self.k8s_api_uri):] filename = filename.replace('/', '_') path = os.path.join( self.capture_dir, "{method}-{url}".format(method=method, url=filename)) visit = self.visited.get(path, 0) self.visited[path] = visit + 1 path += "-{0:0>3}".format(visit) if kwargs.get('stream', False): stream = self.fn(url, method, *args, **kwargs) stream.iter_lines = IterLinesSaver(path, stream.iter_lines).iter_lines return stream else: response = self.fn(url, method, *args, **kwargs) logger.debug("capturing to %s.json", path) encoding = guess_json_utf(response.content) with open(path + ".json", "w") as outf: try: json.dump(json.loads(response.content.decode(encoding)), outf, sort_keys=True, indent=4) except ValueError: outf.write(response.content) return response
def json(self, **kwargs): """ Returns the json-encoded content of a response, if any, with the leading JSON Security String stripped off. :param kwargs: Optional arguments that ``json.loads`` takes. :raises ValueError: If the response body does not contain valid json. """ content = self.text if not self.encoding and self.content and len(self.content) > 3: # No encoding set. JSON RFC 4627 section 3 states we should expect # UTF-8, -16 or -32. Detect which one to use; If the detection or # decoding fails, fall back to `self.text` (using chardet to make # a best guess). encoding = guess_json_utf(self.content) if encoding is not None: # nocoverage try: content = self.content.decode(encoding) except UnicodeDecodeError: # Wrong UTF codec detected; usually because it's not UTF-8 # but some other 8-bit codec. This is an RFC violation, # and the server didn't bother to tell us what codec *was* # used. pass content = JIVE_SECURITY_RE.sub('', content) return complexjson.loads(content, **kwargs)
def json(self, check=True): encoding = guess_json_utf(self.content) text = self.content.decode(encoding) if check and self.status_code not in (0, requests.codes.OK, requests.codes.CREATED): raise OsbsResponseException(text, self.status_code) return json.loads(text)
def request(self, url, method, *args, **kwargs): filename = url if filename.startswith(self.openshift_api_uri): filename = filename[len(self.openshift_api_uri):] if filename.startswith(self.k8s_api_uri): filename = filename[len(self.k8s_api_uri):] filename = filename.replace('/', '_') path = os.path.join(self.capture_dir, "{method}-{url}".format(method=method, url=filename)) visit = self.visited.get(path, 0) self.visited[path] = visit + 1 path += "-{0:0>3}".format(visit) if kwargs.get('stream', False): stream = self.fn(url, method, *args, **kwargs) stream.iter_lines = IterLinesSaver(path, stream.iter_lines).iter_lines return stream else: response = self.fn(url, method, *args, **kwargs) logger.debug("capturing to %s.json", path) encoding = guess_json_utf(response.content) with open(path + ".json", "w") as outf: try: json.dump(json.loads(response.content.decode(encoding)), outf, sort_keys=True, indent=4) except ValueError: outf.write(response.content) return response
def call_service(self): """调用远程服务""" try: encode_data = None if self.params is not None: if self.method == 'GET': self.url += '?' + urlencode(self.params) log_debug(self.url) elif self.method == 'POST': encode_data = urlencode(self.params) opener = urllib2.build_opener() opener.addheaders = self.headers if self.cookie_jar is not None: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar)) res_obj = opener.open(self.url, data=encode_data, timeout=self.timeout) self.set_cookie = res_obj.info().getheader('Set-Cookie') self.res = res_obj.read() # encoding self.encoding = guess_json_utf(self.res) if self.encoding: self.res = self.res.decode(self.encoding) self.json = json.loads(self.res) self.ret = self.json.get('ret') self.msg = self.json.get('msg') self.data = self.json.get('data') except Exception, e: #log_error('[JSONService] url:%s, response:%s, expetion:%s' % (self.url, self.res, e)) return False
def guess_manifest_media_type(content): """ Guess the media type for the given manifest content :param content: JSON content of manifest (bytes) :return: media type (str), or None if unable to guess """ encoding = guess_json_utf(content) try: manifest = json.loads(content.decode(encoding)) except ( ValueError, # Not valid JSON TypeError, # Not an object UnicodeDecodeError): # Unable to decode the bytes logger.exception("Unable to decode JSON") logger.debug("response content (%s): %r", encoding, content) return None try: return manifest['mediaType'] except KeyError: # no mediaType key if manifest.get('schemaVersion') == 1: return get_manifest_media_type('v1') logger.warning("no mediaType or schemaVersion=1 in manifest, keys: %s", manifest.keys())
def json(self): encoding = guess_json_utf(self.content) if encoding is not None: try: return json.loads(self.content.decode(encoding)) except UnicodeDecodeError: pass return json.loads(self.text)
def r_json(self, **kwargs): if not self.encoding and self.r_content and len(self.r_content) > 3: encoding = guess_json_utf(self.r_content) if encoding is not None: try: return complexjson.loads(self.r_content.decode(encoding), **kwargs) except UnicodeDecodeError: pass return complexjson.loads(self.r_text, **kwargs)
def json(self, check=True): encoding = guess_json_utf(self.content) text = self.content.decode(encoding) if check and self.status_code not in (0, requests.codes.OK, requests.codes.CREATED): raise OsbsResponseException(text, self.status_code) try: return json.loads(text) except ValueError: msg = '{0}Headers {1}\nContent {2}'.format('HtttpResponse has corrupt json:\n', self.headers, self.content) logger.exception(msg) raise OsbsResponseException(msg, self.status_code)
def __init__(self, message, status_code, *args, **kwargs): super(OsbsResponseException, self).__init__(message, *args, **kwargs) self.status_code = status_code # try decoding openshift Status object # https://docs.openshift.org/latest/rest_api/openshift_v1.html#v1-status if isinstance(message, six.binary_type): encoding = guess_json_utf(message) message = message.decode(encoding) try: self.json = json.loads(message) except ValueError: self.json = None
def iter_lines(self): encoding = None for line in self.fn(): path = "{f}-{n:0>3}.json".format(f=self.path, n=self.line) logger.debug("capturing to %s", path) if not encoding: encoding = guess_json_utf(line) with open(path, "w") as outf: try: json.dump(json.loads(line.decode(encoding)), outf, sort_keys=True, indent=4) except ValueError: outf.write(line) self.line += 1 yield line
def get_json(self, **kwargs): """ Quoted from: requests.models.json() """ if not self.encoding: self.encoding = chardet.detect(self.content)["encoding"] if not self.encoding and self.content and len(self.content) > 3: # No encoding set. JSON RFC 4627 section 3 states we should expect # UTF-8, -16 or -32. Detect which one to use; If the detection or # decoding fails, fall back to `self.text` (using chardet to make # a best guess). encoding = guess_json_utf(self.content) if encoding is not None: try: return json.loads(self.content.decode(encoding), **kwargs) except UnicodeDecodeError: pass return json.loads(self.get_text(), **kwargs)
def _json(content, text): if not self.encoding and content and len(content) > 3: # No encoding set. JSON RFC 4627 section 3 states we should expect # UTF-8, -16 or -32. Detect which one to use; If the detection or # decoding fails, fall back to `self.text` (using chardet to make # a best guess). encoding = guess_json_utf(content) if encoding is not None: try: return complexjson.loads(content.decode(encoding), **kwargs) except UnicodeDecodeError: # Wrong UTF codec detected; usually because it's not UTF-8 # but some other 8-bit codec. This is an RFC violation, # and the server didn't bother to tell us what codec *was* # used. pass return complexjson.loads(text, **kwargs)
def watch_resource(self, resource_type, resource_name=None, **request_args): path = "watch/namespaces/%s/%s/" % (self.namespace, resource_type) if resource_name is not None: path += "%s/" % resource_name url = self._build_url(path, _prepend_namespace=False, **request_args) for retry in range(WATCH_RETRY): response = self._get(url, stream=True, headers={'Connection': 'close'}) check_response(response) encoding = None for line in response.iter_lines(): logger.debug(line) if not encoding: encoding = guess_json_utf(line) try: j = json.loads(line.decode(encoding)) except ValueError: logger.error("Cannot decode watch event: %s", line) continue if 'object' not in j: logger.error("Watch event has no 'object': %s", j) continue if 'type' not in j: logger.error("Watch event has no 'type': %s", j) continue yield (j['type'].lower(), j['object']) logger.debug("connection closed, reconnecting in %ds", WATCH_RETRY_SECS) time.sleep(WATCH_RETRY_SECS)
def _try_to_serialize_response(self, resp): s = self.serializer if resp.status_code in [204, 205]: return if resp.headers.get("content-type", None) and resp.content: content_type = resp.headers.get("content-type").split( ';')[0].strip() try: stype = s.get_serializer(content_type=content_type) except SerializerNotAvailable: return resp.content if isinstance(resp.content, bytes): try: encoding = guess_json_utf(resp.content) return stype.loads(resp.content.decode(encoding)) except Exception: return resp.content return stype.loads(resp.content) else: return resp.content
def __process(self, data=None): if not data: return import telebot json_data = json.loads(data.decode(), encoding=guess_json_utf(data)) update = telebot.types.Update.de_json(json_data) telegram_message = update.message from python_bot.bot.bot import bot_logger bot_logger.debug("Received message: %s" % telegram_message) user = telegram_message.from_user message = create_message(telegram_message.content_type, user=UserInfo(user.id, user.first_name, user.last_name, user.username), date=telegram_message.date, text=telegram_message.text, message_id=user.id) if not message: bot_logger.debug("Failed to initiate message data for content type [%s]" % message.content_Type) return self.on_message(message, data, extra=telegram_message)
def test_encoded(self, encoding): data = '{}'.encode(encoding) assert guess_json_utf(data) == encoding
def test_guess_by_bom(self, encoding, expected): data = u'\ufeff{}'.encode(encoding) assert guess_json_utf(data) == expected
def test_bad_utf_like_encoding(self): assert guess_json_utf(b'\x00\x00\x00\x00') is None
def get_manifest_digests(image, registry, insecure=False, dockercfg_path=None, versions=('v1', 'v2', 'v2_list', 'oci', 'oci_index'), require_digest=True): """Return manifest digest for image. :param image: ImageName, the remote image to inspect :param registry: str, URI for registry, if URI schema is not provided, https:// will be used :param insecure: bool, when True registry's cert is not verified :param dockercfg_path: str, dirname of .dockercfg location :param versions: tuple, which manifest schema versions to fetch digest :param require_digest: bool, when True exception is thrown if no digest is set in the headers. :return: dict, versions mapped to their digest """ registry_session = RegistrySession(registry, insecure=insecure, dockercfg_path=dockercfg_path) digests = {} # If all of the media types return a 404 NOT_FOUND status, then we rethrow # an exception, if all of the media types fail for some other reason - like # bad headers - then we return a ManifestDigest object with no digests. # This is interesting for the Pulp "retry until the manifest shows up" case. all_not_found = True saved_not_found = None for version in versions: media_type = get_manifest_media_type(version) headers = {'Accept': media_type} try: response = query_registry( registry_session, image, digest=None, version=version) all_not_found = False except (HTTPError, RetryError, Timeout) as ex: if ex.response.status_code == requests.codes.not_found: saved_not_found = ex else: all_not_found = False # If the registry has a v2 manifest that can't be converted into a v1 # manifest, the registry fails with status=400 (BAD_REQUEST), and an error code of # MANIFEST_INVALID. Note that if the registry has v2 manifest and # you ask for an OCI manifest, the registry will try to convert the # v2 manifest into a v1 manifest as the default type, so the same # thing occurs. if version != 'v2' and ex.response.status_code == requests.codes.bad_request: logger.warning('Unable to fetch digest for %s, got error %s', media_type, ex.response.status_code) continue # Returned if the manifest could not be retrieved for the given # media type elif (ex.response.status_code == requests.codes.not_found or ex.response.status_code == requests.codes.not_acceptable): continue else: raise received_media_type = None try: received_media_type = response.headers['Content-Type'] except KeyError: # Guess content_type from contents try: encoding = guess_json_utf(response.content) manifest = json.loads(response.content.decode(encoding)) received_media_type = manifest['mediaType'] except (ValueError, # not valid JSON KeyError) as ex: # no mediaType key logger.warning("Unable to fetch media type: neither Content-Type header " "nor mediaType in output was found") if not received_media_type: continue # Only compare prefix as response may use +prettyjws suffix # which is the case for signed manifest response_h_prefix = received_media_type.rsplit('+', 1)[0] request_h_prefix = media_type.rsplit('+', 1)[0] if response_h_prefix != request_h_prefix: logger.debug('request headers: %s', headers) logger.debug('response headers: %s', response.headers) logger.warning('Received media type %s mismatches the expected %s', received_media_type, media_type) continue # set it to truthy value so that koji_import would know pulp supports these digests digests[version] = True logger.debug('Received media type %s', received_media_type) if not response.headers.get('Docker-Content-Digest'): logger.warning('Unable to fetch digest for %s, no Docker-Content-Digest header', media_type) continue digests[version] = response.headers['Docker-Content-Digest'] context = '/'.join([x for x in [image.namespace, image.repo] if x]) tag = image.tag or 'latest' logger.debug('Image %s:%s has %s manifest digest: %s', context, tag, version, digests[version]) if not digests: if all_not_found and len(versions) > 0: raise saved_not_found if require_digest: raise RuntimeError('No digests found for {}'.format(image)) return ManifestDigest(**digests)
def get_manifest_digests(image, registry, insecure=False, dockercfg_path=None, versions=('v1', 'v2', 'v2_list'), require_digest=True): """Return manifest digest for image. :param image: ImageName, the remote image to inspect :param registry: str, URI for registry, if URI schema is not provided, https:// will be used :param insecure: bool, when True registry's cert is not verified :param dockercfg_path: str, dirname of .dockercfg location :param versions: tuple, which manifest schema versions to fetch digest :param require_digest: bool, when True exception is thrown if no digest is set in the headers. :return: dict, versions mapped to their digest """ digests = {} for version in versions: media_type = get_manifest_media_type(version) headers = {'Accept': media_type} try: response = query_registry(image, registry, digest=None, insecure=insecure, dockercfg_path=dockercfg_path, version=version) except (HTTPError, RetryError) as ex: # If the registry has a v2 manifest that can't be converted into a v1 # manifest, the registry fails with status=400, and a error code of # MANIFEST_INVALID. if version == 'v1' and ex.response.status_code == 400: logger.warning('Unable to fetch digest for %s, got error %s', media_type, ex.response.status_code) continue else: raise received_media_type = None try: received_media_type = response.headers['Content-Type'] except KeyError: # Guess content_type from contents try: encoding = guess_json_utf(response.content) manifest = json.loads(response.content.decode(encoding)) received_media_type = manifest['mediaType'] except ( ValueError, # not valid JSON KeyError) as ex: # no mediaType key logger.warning( "Unable to fetch media type: neither Content-Type header " "nor mediaType in output was found") logger.debug("exception: %r", ex) logger.debug("response content: %r", response.content) logger.debug("response headers: %s", response.headers) if not received_media_type: continue # Only compare prefix as response may use +prettyjws suffix # which is the case for signed manifest response_h_prefix = received_media_type.rsplit('+', 1)[0] request_h_prefix = media_type.rsplit('+', 1)[0] if response_h_prefix != request_h_prefix: logger.debug('request headers: %s', headers) logger.debug('response headers: %s', response.headers) logger.warning('Received media type %s mismatches the expected %s', media_type, received_media_type) continue # set it to truthy value so that koji_import would know pulp supports these digests digests[version] = True logger.debug('Received media type %s', received_media_type) if not response.headers.get('Docker-Content-Digest'): logger.warning( 'Unable to fetch digest for %s, no Docker-Content-Digest header', media_type) continue digests[version] = response.headers['Docker-Content-Digest'] context = '/'.join([x for x in [image.namespace, image.repo] if x]) tag = image.tag or 'latest' logger.debug('Image %s:%s has %s manifest digest: %s', context, tag, version, digests[version]) if not digests and require_digest: raise RuntimeError('No digests found for {}'.format(image)) return ManifestDigest(**digests)
def test_encoded_number(self, encoding): data = '0'.encode(encoding) assert guess_json_utf(data) == encoding
def test_encoded_euro_sign(self, encoding): data = '"\u20AC"'.encode(encoding) assert guess_json_utf(data) == encoding
def get_manifest_digests(image, registry, insecure=False, dockercfg_path=None, versions=('v1', 'v2', 'v2_list', 'oci', 'oci_index'), require_digest=True): """Return manifest digest for image. :param image: ImageName, the remote image to inspect :param registry: str, URI for registry, if URI schema is not provided, https:// will be used :param insecure: bool, when True registry's cert is not verified :param dockercfg_path: str, dirname of .dockercfg location :param versions: tuple, which manifest schema versions to fetch digest :param require_digest: bool, when True exception is thrown if no digest is set in the headers. :return: dict, versions mapped to their digest """ registry_session = RegistrySession(registry, insecure=insecure, dockercfg_path=dockercfg_path) digests = {} # If all of the media types return a 404 NOT_FOUND status, then we rethrow # an exception, if all of the media types fail for some other reason - like # bad headers - then we return a ManifestDigest object with no digests. # This is interesting for the Pulp "retry until the manifest shows up" case. all_not_found = True saved_not_found = None for version in versions: media_type = get_manifest_media_type(version) headers = {'Accept': media_type} try: response = query_registry(registry_session, image, digest=None, version=version) all_not_found = False except (HTTPError, RetryError, Timeout) as ex: if ex.response.status_code == requests.codes.not_found: saved_not_found = ex else: all_not_found = False # If the registry has a v2 manifest that can't be converted into a v1 # manifest, the registry fails with status=400 (BAD_REQUEST), and an error code of # MANIFEST_INVALID. Note that if the registry has v2 manifest and # you ask for an OCI manifest, the registry will try to convert the # v2 manifest into a v1 manifest as the default type, so the same # thing occurs. if version != 'v2' and ex.response.status_code == requests.codes.bad_request: logger.warning('Unable to fetch digest for %s, got error %s', media_type, ex.response.status_code) continue # Returned if the manifest could not be retrieved for the given # media type elif (ex.response.status_code == requests.codes.not_found or ex.response.status_code == requests.codes.not_acceptable): continue else: raise received_media_type = None try: received_media_type = response.headers['Content-Type'] except KeyError: # Guess content_type from contents try: encoding = guess_json_utf(response.content) manifest = json.loads(response.content.decode(encoding)) received_media_type = manifest['mediaType'] except ( ValueError, # not valid JSON KeyError) as ex: # no mediaType key logger.warning( "Unable to fetch media type: neither Content-Type header " "nor mediaType in output was found") if not received_media_type: continue # Only compare prefix as response may use +prettyjws suffix # which is the case for signed manifest response_h_prefix = received_media_type.rsplit('+', 1)[0] request_h_prefix = media_type.rsplit('+', 1)[0] if response_h_prefix != request_h_prefix: logger.debug('request headers: %s', headers) logger.debug('response headers: %s', response.headers) logger.warning('Received media type %s mismatches the expected %s', received_media_type, media_type) continue # set it to truthy value so that koji_import would know pulp supports these digests digests[version] = True logger.debug('Received media type %s', received_media_type) if not response.headers.get('Docker-Content-Digest'): logger.warning( 'Unable to fetch digest for %s, no Docker-Content-Digest header', media_type) continue digests[version] = response.headers['Docker-Content-Digest'] context = '/'.join([x for x in [image.namespace, image.repo] if x]) tag = image.tag or 'latest' logger.debug('Image %s:%s has %s manifest digest: %s', context, tag, version, digests[version]) if not digests: if all_not_found and len(versions) > 0: raise saved_not_found if require_digest: raise RuntimeError('No digests found for {}'.format(image)) return ManifestDigest(**digests)
def watch_resource(self, resource_type, resource_name=None, **request_args): """ Generator function which yields tuples of (change_type, object) where: - change_type is one of: - 'modified', the object was modified - 'deleted', the object was deleted - None, a fresh version of the object was retrieved using GET (only when resource_name is provided) - object is the latest version of the object """ def log_and_sleep(): logger.debug("connection closed, reconnecting in %ds", WATCH_RETRY_SECS) time.sleep(WATCH_RETRY_SECS) watch_path = "watch/namespaces/%s/%s/" % (self.namespace, resource_type) if resource_name is not None: watch_path += "%s/" % resource_name api_ver = OCP_RESOURCE_API_VERSION_MAP[resource_type] watch_url = self._build_url(api_ver, watch_path, _prepend_namespace=False, **request_args) get_url = None if resource_name is not None: get_url = self._build_url(api_ver, "%s/%s" % (resource_type, resource_name)) bad_responses = 0 for _ in range(WATCH_RETRY): logger.debug("watching for updates") try: response = self._get(watch_url, stream=True, headers={'Connection': 'close'}) check_response(response) # we're already retrying, so there's no need to panic just because of a bad response except OsbsResponseException as exc: bad_responses += 1 if bad_responses > MAX_BAD_RESPONSES: raise exc else: # check_response() already logged the message, so just report that we're # sleeping and retry log_and_sleep() continue encoding = None # Avoid races. We've already asked the server to tell us # about changes to the object, but now ask for a fresh # copy of the object as well. This is to catch the # situation where the object changed before the call to # this method, or in between retries in this method. if get_url is not None: logger.debug("retrieving fresh version of object") fresh_response = self._get(get_url) check_response(fresh_response) yield None, fresh_response.json() for line in response.iter_lines(): logger.debug('%r', line) if not encoding: encoding = guess_json_utf(line) try: j = json.loads(line.decode(encoding)) except ValueError: logger.error("Cannot decode watch event: %s", line) continue if 'object' not in j: logger.error("Watch event has no 'object': %s", j) continue if 'type' not in j: logger.error("Watch event has no 'type': %s", j) continue yield (j['type'].lower(), j['object']) log_and_sleep()
def watch_resource(self, api_path, api_version, resource_type, resource_name, **request_args): """ Watch for changes in openshift object and return it's json representation after each update to the object """ def log_and_sleep(): logger.debug("connection closed, reconnecting in %ds", WATCH_RETRY_SECS) time.sleep(WATCH_RETRY_SECS) watch_path = f"watch/namespaces/{self.namespace}/{resource_type}/{resource_name}/" watch_url = self.build_url(api_path, api_version, watch_path, _prepend_namespace=False, **request_args) get_url = self.build_url(api_path, api_version, f"{resource_type}/{resource_name}") bad_responses = 0 for _ in range(WATCH_RETRY): logger.debug("watching for updates for %s, %s", resource_type, resource_name) try: response = self.get(watch_url, stream=True, headers={'Connection': 'close'}) check_response(response) # we're already retrying, so there's no need to panic just because of a bad response except OsbsResponseException as exc: bad_responses += 1 if bad_responses > MAX_BAD_RESPONSES: raise exc else: # check_response() already logged the message, so just report that we're # sleeping and retry log_and_sleep() continue for line in response.iter_lines(): encoding = guess_json_utf(line) try: j = json.loads(line.decode(encoding)) except ValueError: logger.error("Cannot decode watch event: %s", line) continue if 'object' not in j: logger.error("Watch event has no 'object': %s", j) continue if 'type' not in j: logger.error("Watch event has no 'type': %s", j) continue # Avoid races. We've already asked the server to tell us # about changes to the object, but now ask for a fresh # copy of the object as well. This is to catch the # situation where the object changed before the call to # this method, or in between retries in this method. logger.debug("retrieving fresh version of object %s", resource_name) fresh_response = self.get(get_url) check_response(fresh_response) yield fresh_response.json() log_and_sleep()