def parse_it(raw_headers: Any) -> Headers: """ Just decode anything that could contain headers. That simple PERIOD. :param raw_headers: Accept bytes, str, fp, dict, email.Message, requests.Response, urllib3.HTTPResponse and httpx.Response. :raises: TypeError: If passed argument cannot be parsed to extract headers from it. """ headers: Optional[Iterable[Tuple[str, Any]]] = None if isinstance(raw_headers, str): headers = HeaderParser().parsestr(raw_headers, headersonly=True).items() elif isinstance(raw_headers, bytes) or isinstance(raw_headers, RawIOBase): decoded, not_decoded = extract_encoded_headers( raw_headers if isinstance(raw_headers, bytes ) else raw_headers.read() or b"") return parse_it(decoded) elif isinstance(raw_headers, Mapping) or isinstance(raw_headers, Message): headers = raw_headers.items() else: r = extract_class_name(type(raw_headers)) if r: if r == "requests.models.Response": headers = [] for header_name in raw_headers.raw.headers: for header_content in raw_headers.raw.headers.getlist( header_name): headers.append((header_name, header_content)) elif r in [ "httpx._models.Response", "urllib3.response.HTTPResponse" ]: headers = raw_headers.headers.items() if headers is None: raise TypeError( "Cannot parse type {type_} as it is not supported by kiss-header.". format(type_=type(raw_headers))) revised_headers: List[Tuple[str, str]] = decode_partials(headers) # Sometime raw content does not begin with headers. If that is the case, search for the next line. if (len(revised_headers) == 0 and len(raw_headers) > 0 and (isinstance(raw_headers, bytes) or isinstance(raw_headers, str))): next_iter = raw_headers.split( b"\n" if isinstance(raw_headers, bytes) else "\n", maxsplit=1 # type: ignore ) if len(next_iter) >= 2: return parse_it(next_iter[-1]) # Prepare Header objects list_of_headers: List[Header] = [] for head, content in revised_headers: # We should ignore when a illegal name is considered as an header. We avoid ValueError (in __init__ of Header) if is_legal_header_name(head) is False: continue entries: List[str] = header_content_split(content, ",") # Multiple entries are detected in one content at the only exception that its not IMAP header "Subject". if len(entries) > 1 and normalize_str(head) != "subject": for entry in entries: list_of_headers.append(Header(head, entry)) else: list_of_headers.append(Header(head, content)) return Headers(*list_of_headers)
def parse_it(raw_headers: Any) -> Headers: """ Just decode anything that could contain headers. That simple PERIOD. """ headers: Optional[Iterable[Tuple[str, Any]]] = None if isinstance(raw_headers, str): headers = HeaderParser().parsestr(raw_headers, headersonly=True).items() elif isinstance(raw_headers, bytes) or isinstance(raw_headers, IOBase): headers = ( BytesHeaderParser().parse( BytesIO(raw_headers) if isinstance(raw_headers, bytes) else raw_headers, # type: ignore headersonly=True, ).items()) elif isinstance(raw_headers, Mapping): headers = raw_headers.items() else: r = findall(r"<class '([a-zA-Z0-9._]+)'>", str(type(raw_headers))) if r: if r[0] == "requests.models.Response": headers = [] for header_name in raw_headers.raw.headers: for header_content in raw_headers.raw.headers.getlist( header_name): headers.append((header_name, header_content)) elif r[0] == "httpx._models.Response": headers = raw_headers.headers.items() if headers is None: raise TypeError( "Cannot parse type {type_} as it is not supported by kiss-header.". format(type_=type(raw_headers))) revised_headers = list() for head, content in headers: revised_content: str = str() for partial, partial_encoding in decode_header(content): if isinstance(partial, str): revised_content += partial if isinstance(partial, bytes): revised_content += partial.decode( partial_encoding if partial_encoding is not None else "utf-8", errors="ignore", ) revised_headers.append((head, revised_content)) # Sometime raw content does not begin with headers. If that is the case, search for the next line. if (len(revised_headers) == 0 and len(raw_headers) > 0 and (isinstance(raw_headers, bytes) or isinstance(raw_headers, str))): next_iter = raw_headers.split( b"\n" if isinstance(raw_headers, bytes) else "\n", maxsplit=1 # type: ignore ) if len(next_iter) >= 2: return parse_it(next_iter[-1]) return Headers( [Header(head, content) for head, content in revised_headers])