def parse_combined(path): path = decode_path(path) if path.endswith(u'.https'): scheme = u'https' elif path.endswith(u'.noscheme'): scheme = None else: scheme = u'http' with io.open(path, 'rb') as f: data = f.read() parts1 = data.split(b'======== BEGIN INBOUND STREAM ========\r\n', 1) if len(parts1) != 2: raise InputError(u'%s: bad combined file: no inbound marker' % path) (preamble, rest) = parts1 try: preamble = preamble.decode('utf-8') except UnicodeError as exc: # pragma: no cover raise InputError(u'%s: invalid UTF-8 in preamble' % path) from exc parts2 = rest.split(b'======== BEGIN OUTBOUND STREAM ========\r\n', 1) if len(parts2) != 2: # pragma: no cover raise InputError(u'%s: bad combined file: no outbound marker' % path) (inbound_data, outbound_data) = parts2 inbound = Stream(io.BufferedReader(io.BytesIO(inbound_data)), name=path + u' (inbound)') outbound = Stream(io.BufferedReader(io.BytesIO(outbound_data)), name=path + u' (outbound)') return (inbound, outbound, scheme, preamble)
def _parse_paths(inbound_path, outbound_path, scheme=u'http'): inbound_file = inbound = outbound_file = outbound = None try: if inbound_path: inbound_file = io.open(inbound_path, 'rb') inbound = Stream(inbound_file, name=decode_path(inbound_path)) if outbound_path: outbound_file = io.open(outbound_path, 'rb') outbound = Stream(outbound_file, name=decode_path(outbound_path)) for exch in parse_streams(inbound, outbound, scheme): yield exch finally: if inbound_file is not None: inbound_file.close() if outbound_file is not None: outbound_file.close()
def _process_request(data, creator, path): (version, header_entries, pseudo_headers) = _process_message(data, creator) if creator.is_chrome and version == http11 and u':host' in pseudo_headers: # SPDY exported from Chrome. version = None # Firefox exports "Connection: keep-alive" on HTTP/2 requests # (which triggers notice 1244) # even though it does not actually send it # (this can be verified with SSLKEYLOGFILE + Wireshark). if creator.is_firefox and version == http2: header_entries = [(name, value) for (name, value) in header_entries if (name, value) != (h.connection, u'keep-alive')] method = data['method'] header_names = {name for (name, _) in header_entries} parsed = urlparse(data['url']) scheme = parsed.scheme if creator.is_insomnia: # https://github.com/getinsomnia/insomnia/issues/840 if h.host not in header_names: header_entries.insert(0, (h.host, parsed.netloc)) if h.user_agent not in header_names: # The actual version can probably be extracted from ua_string = u'insomnia/%s' % creator.reconstruct_insomnia_version() header_entries.append((h.user_agent, ua_string)) if h.accept not in header_names: header_entries.append((h.accept, u'*/*')) header_names = {name for (name, _) in header_entries} if method == m.CONNECT: target = parsed.netloc elif h.host in header_names: # With HAR, we can't tell if the request was to a proxy or to a server. # So we force most requests into the "origin form" of the target, target = parsed.path if parsed.query: target += u'?' + parsed.query else: # However, if the request has no ``Host`` header, # the user won't be able to see the target host # unless we set the full URL ("absolute form") as the target. # To prevent this from having an effect on the proxy logic, # we explicitly set `Request.is_to_proxy` to `None` later. target = data['url'] if data['bodySize'] == 0: # No body, or a body of length 0 (which we do not distinguish). body = b'' elif data['bodySize'] > 0: # A message body was present, but we cannot recover it, # because message body is the body *with* ``Content-Encoding``, # and HAR does not include that. body = Unavailable() else: # Unknown. Maybe there was a body, maybe there wasn't. body = None text = None post = data.get('postData') if post and post.get('text'): text = post['text'] if creator.is_firefox and \ post['mimeType'] == media.application_x_www_form_urlencoded \ and u'\r\n' in text: # Yes, Firefox actually outputs this stuff. Go figure. (wtf, actual_text) = text.rsplit(u'\r\n', 1) try: buf = io.BufferedReader(io.BytesIO(wtf.encode('iso-8859-1'))) more_entries = framing1.parse_header_fields(Stream(buf)) except (UnicodeError, ParseError): # pragma: no cover pass else: header_entries.extend(more_entries) text = actual_text if creator.is_fiddler and method == m.CONNECT and u'Fiddler' in text: # Fiddler's HAR export adds a body with debug information # to CONNECT requests. text = None body = b'' req = Request(scheme, method, target, version, header_entries, body, remark=u'from %s' % path) if text is not None: req.unicode_body = text req.is_to_proxy = None # See above. return req