def test_fails_with_binary_data(self, post_request): # shlex.quote doesn't support a bytes object # see https://github.com/python/cpython/pull/10871 post_request.request.headers[ "Content-Type"] = "application/json; charset=utf-8" with pytest.raises(exceptions.CommandError): export.curl_command(post_request)
def test_correct_host_used(self, get_request): get_request.request.headers["host"] = "domain:22" result = """curl -H 'header: qvalue' -H 'host: domain:22' 'http://domain:22/path?a=foo&a=bar&b=baz'""" assert export.curl_command(get_request) == result result = """curl --resolve 'domain:22:[192.168.0.1]' -H 'header: qvalue' -H 'host: domain:22' """ \ """'http://domain:22/path?a=foo&a=bar&b=baz'""" assert export.curl_command(get_request, preserve_ip=True) == result
def test_correct_host_used(self, get_request): e = export.Export() with taddons.context() as tctx: tctx.configure(e) get_request.request.headers["host"] = "domain:22" result = """curl -H 'header: qvalue' -H 'host: domain:22' 'http://domain:22/path?a=foo&a=bar&b=baz'""" assert export.curl_command(get_request) == result tctx.options.export_preserve_original_ip = True result = """curl --resolve 'domain:22:[192.168.0.1]' -H 'header: qvalue' -H 'host: domain:22' """ \ """'http://domain:22/path?a=foo&a=bar&b=baz'""" assert export.curl_command(get_request) == result
def test_strip_unnecessary(self, get_request): get_request.request.headers.clear() get_request.request.headers["host"] = "address" get_request.request.headers[":authority"] = "address" get_request.request.headers["accept-encoding"] = "br" result = """curl --compressed 'http://address:22/path?a=foo&a=bar&b=baz'""" assert export.curl_command(get_request) == result
def error(self, flow): """ Checks if the watchdog will be triggered. Only triggers watchdog for timeouts after last reset and if flow.error is set (shows that error is a server error). Ignores HttpSyntaxException Errors since this can be triggered on purpose by web application scanner. Args: flow: mitmproxy.http.flow """ if (self.not_in_timeout(self.last_trigger, self.timeout) and flow.error is not None and not isinstance(flow.error, HttpSyntaxException)): self.last_trigger = time.time() logger.error(f"Watchdog triggered! Cause: {flow}") self.error_event.set() # save the request which might have caused the problem if flow.request: with (self.flow_dir / f"{datetime.utcnow().isoformat()}.curl").open("w") as f: f.write(curl_command(flow)) with (self.flow_dir / f"{datetime.utcnow().isoformat()}.raw").open("wb") as f: f.write(raw(flow))
def test_tcp(self, tcp_flow): with pytest.raises(exceptions.CommandError): export.curl_command(tcp_flow)
def test_patch(self, patch_request): result = """curl -H 'header:qvalue' -H 'content-length:7' -X PATCH 'http://address:22/path?query=param' --data-binary 'content'""" assert export.curl_command(patch_request) == result
def test_post(self, post_request): result = "curl -H 'content-length:256' -X POST 'http://address:22/path' --data-binary '{}'".format( str(bytes(range(256)))[2:-1] ) assert export.curl_command(post_request) == result
def test_get(self, get_request): result = """curl -H 'header:qvalue' -H 'content-length:0' 'http://address:22/path?a=foo&a=bar&b=baz'""" assert export.curl_command(get_request) == result
def test_escape_single_quotes_in_body(self): request = tflow.tflow( req=tutils.treq(method=b'POST', headers=(), content=b"'&#")) command = export.curl_command(request) assert shlex.split(command)[-2] == '-d' assert shlex.split(command)[-1] == "'&#"
def test_patch(self, patch_request): result = """curl -H 'header: qvalue' -X PATCH 'http://address:22/path?query=param' -d content""" assert export.curl_command(patch_request) == result
def test_post(self, post_request): post_request.request.content = b'nobinarysupport' result = "curl -X POST http://address:22/path -d nobinarysupport" assert export.curl_command(post_request) == result
def response(self, flow: mitmproxy.http.HTTPFlow): # if request_pretty_url == "https://www.porntrex.com/categories/old-and-young/": # if "x-requested-with" in flow.request.headers and flow.request.headers["x-requested-with"].find("XMLHttpRequest") != -1: if ("x-requested-with" in flow.request.headers and "XMLHttpRequest" in flow.request.headers["x-requested-with"] or self.re_site.match(flow.request.pretty_url) is not None): print("request_pretty_url: {}".format(flow.request.pretty_url)) cookie_fields = flow.request.cookies.fields # print(cookie_fields) cookies_list = [] for name, value in cookie_fields: cookies_list.append(f"{name}={value}") cookies_string = ";".join(cookies_list) # print(cookies_string) # request_pretty_url = flow.request.pretty_url # print("request_pretty_url: {}".format(request_pretty_url)) curl_cmd = curl_command(flow) # print("curl_cmd: {}".format(curl_cmd)) # re_next_pag = re.compile(r"(?<=&from=).*(?=&)") headers = self.re_header.findall(curl_cmd) # buffer = BytesIO() # c_link = pycurl.Curl() # c_link.setopt(c_link.URL, request_pretty_url) # c_link.setopt(c_link.CAINFO, certifi.where()) # c_link.setopt(c_link.WRITEDATA, buffer) # c_link.setopt(c_link.HTTPHEADER, headers) # c.setopt(pycurl.VERBOSE,1) # c_link.setopt(c_link.COOKIE, cookies_string) # c.setopt(c.COOKIEFILE, 'ngnms.cookie') # c.setopt(c.COOKIEJAR, 'ngnms.cookie') # The format of the cookie used by c.setopt(pycurl.COOKIE,cookie)// is the string: "key=value;key=value". # try: # c_link.perform() # except c_link.error as e: # print(f"{threading.current_thread().getName()} - c.perform() failed: {e}") # c_link.close() # body_decoded = buffer.getvalue().decode("UTF-8") # with open_auto(f"{datetime.utcnow().isoformat()}-decode.html", "w") as f: # f.write(body_decoded) body_decoded = flow.response.content soup = BeautifulSoup(body_decoded, "lxml") video_list = soup.body.find("div", attrs={"class": "video-list"}) quality_check = video_list.find_all("span", attrs={"class": "quality"}, string=self.quality) work_queue: queue.Queue = queue.Queue() for link in quality_check: link_url = link.find_previous("a", attrs={ "class": "thumb rotator-screen" }).get("href") link_parent = link.find_parent("div", attrs={"data-item-id": True}) # print(link_url) work_queue.put((link_url, link_parent)) num_urls = len(work_queue.queue) num_conn = num_urls print(f"num_urls: {num_urls}") video_sizes = VideoSizes() threads = [] for dummy in range(num_conn): t = WorkerThread(work_queue, headers, cookies_string, video_sizes) t.start() threads.append(t) for thread in threads: thread.join() video_sizes.sort() # print(video_sizes.values()) video_list.clear() # with open_auto(f"{datetime.utcnow().isoformat()}-clear.html", "w") as f: # f.write(str(soup)) new_links_list = [] for new_link in video_sizes.values(): new_links_list.append(new_link[2]) new_links_string = "".join(new_links_list) new_soup = BeautifulSoup(new_links_string, "lxml") # new_video_list = soup.body.find("div", attrs={"class": "video-list"}) # new_video_list.append(new_soup) video_list.append(new_soup) # with open_auto(f"{datetime.utcnow().isoformat()}-new.html", "w") as f: # f.write(str(soup)) flow.response.content = str(soup).encode()
def _get_data(self, data, flow): return "{}\n\n{}\nCurl:\n{}\n\nHttpie:\n{}".format( data, "-" * 20, curl_command(flow), httpie_command(flow))