def load_saved_test(self) -> None: """Load a saved test by test_id.""" try: fd = gzip.open( os.path.join(self.config.save_dir, os.path.basename(self.test_id))) mtime = os.fstat(fd.fileno()).st_mtime except (OSError, IOError, TypeError, zlib.error): self.response_start( b"404", b"Not Found", [(b"Content-Type", b"text/html; charset=%s" % self.charset_bytes), (b"Cache-Control", b"max-age=600, must-revalidate")]) self.response_body( self.show_error( "I'm sorry, I can't find that saved response.")) self.response_done([]) return is_saved = mtime > thor.time() try: top_resource = pickle.load(fd) except (pickle.PickleError, IOError, EOFError): self.response_start( b"500", b"Internal Server Error", [(b"Content-Type", b"text/html; charset=%s" % self.charset_bytes), (b"Cache-Control", b"max-age=600, must-revalidate")]) self.response_body( self.show_error("I'm sorry, I had a problem loading that.")) self.response_done([]) return finally: fd.close() if self.check_name: display_resource = top_resource.subreqs.get( self.check_name, top_resource) else: display_resource = top_resource formatter = find_formatter(self.format, 'html', top_resource.descend)( self.ui_uri, self.config.lang, self.output, allow_save=(not is_saved), is_saved=True, test_id=self.test_id) content_type = "%s; charset=%s" % (formatter.media_type, self.config.charset) self.response_start( b"200", b"OK", [(b"Content-Type", content_type.encode('ascii')), (b"Cache-Control", b"max-age=3600, must-revalidate")]) @thor.events.on(formatter) def formatter_done() -> None: self.response_done([]) formatter.bind_resource(display_resource)
def load_saved_test(self): """Load a saved test by test_id.""" try: fd = gzip.open(os.path.join( save_dir, os.path.basename(self.test_id) )) mtime = os.fstat(fd.fileno()).st_mtime except (OSError, IOError, TypeError, zlib.error): self.response_start( "404", "Not Found", [ ("Content-Type", "text/html; charset=%s" % charset), ("Cache-Control", "max-age=600, must-revalidate") ]) # TODO: better error page (through formatter?) self.response_body(error_template % "I'm sorry, I can't find that saved response." ) self.response_done([]) return is_saved = mtime > thor.time() try: state = pickle.load(fd) except (pickle.PickleError, EOFError): self.response_start( "500", "Internal Server Error", [ ("Content-Type", "text/html; charset=%s" % charset), ("Cache-Control", "max-age=600, must-revalidate") ]) # TODO: better error page (through formatter?) self.response_body(error_template % "I'm sorry, I had a problem reading that response." ) self.response_done([]) return finally: fd.close() formatter = find_formatter(self.format, 'html', self.descend)( self.base_uri, state.request.uri, state.orig_req_hdrs, lang, self.output, allow_save=(not is_saved), is_saved=True, test_id=self.test_id ) self.response_start( "200", "OK", [ ("Content-Type", "%s; charset=%s" % ( formatter.media_type, charset)), ("Cache-Control", "max-age=3600, must-revalidate") ]) if self.check_type: # TODO: catch errors state = state.subreqs.get(self.check_type, None) formatter.start_output() formatter.set_state(state) formatter.finish_output() self.response_done([])
def run_test(self): """Test a URI.""" if save_dir and os.path.exists(save_dir): try: fd, path = tempfile.mkstemp(prefix='', dir=save_dir) test_id = os.path.split(path)[1] except (OSError, IOError): # Don't try to store it. test_id = None else: test_id = None formatter = find_formatter(self.format, 'html', self.descend)(self.base_uri, self.test_uri, self.req_hdrs, lang, self.output, allow_save=test_id, is_saved=False, test_id=test_id, descend=self.descend) self.response_start("200", "OK", [("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)), ("Cache-Control", "max-age=60, must-revalidate")]) ired = droid.InspectingResourceExpertDroid(self.test_uri, req_hdrs=self.req_hdrs, status_cb=formatter.status, body_procs=[formatter.feed], descend=self.descend) # sys.stdout.write(pickle.dumps(ired.state)) formatter.start_output() def done(): if self.req_type: # TODO: catch errors state = ired.state.subreqs.get(self.req_type, None) else: state = ired.state formatter.set_red(state) formatter.finish_output() self.response_done([]) if test_id: try: tmp_file = gzip.open(path, 'w') pickle.dump(ired.state, tmp_file) tmp_file.close() except (IOError, zlib.error, pickle.PickleError): pass # we don't cry if we can't store it. # objgraph.show_growth() ired.run(done)
def load_saved_test(self): """Load a saved test by test_id.""" try: fd = gzip.open( os.path.join(save_dir, os.path.basename(self.test_id))) mtime = os.fstat(fd.fileno()).st_mtime except (OSError, IOError, TypeError, zlib.error): self.response_start( "404", "Not Found", [("Content-Type", "text/html; charset=%s" % charset), ("Cache-Control", "max-age=600, must-revalidate")]) # TODO: better error page (through formatter?) self.response_body(error_template % "I'm sorry, I can't find that saved response.") self.response_done([]) return is_saved = mtime > thor.time() try: state = pickle.load(fd) except (pickle.PickleError, EOFError): self.response_start( "500", "Internal Server Error", [("Content-Type", "text/html; charset=%s" % charset), ("Cache-Control", "max-age=600, must-revalidate")]) # TODO: better error page (through formatter?) self.response_body( error_template % "I'm sorry, I had a problem reading that response.") self.response_done([]) return finally: fd.close() formatter = find_formatter(self.format, 'html', self.descend)(self.base_uri, state.uri, state.orig_req_hdrs, lang, self.output, allow_save=(not is_saved), is_saved=True, test_id=self.test_id) self.response_start( "200", "OK", [("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)), ("Cache-Control", "max-age=3600, must-revalidate")]) if self.req_type: # TODO: catch errors state = state.subreqs.get(self.req_type, None) formatter.start_output() formatter.set_red(state) formatter.finish_output() self.response_done([])
def run_test(self): """Test a URI.""" if save_dir and os.path.exists(save_dir): try: fd, path = tempfile.mkstemp(prefix='', dir=save_dir) test_id = os.path.split(path)[1] except (OSError, IOError): # Don't try to store it. test_id = None else: test_id = None formatter = find_formatter(self.format, 'html', self.descend)( self.base_uri, self.test_uri, self.req_hdrs, lang, self.output, allow_save=test_id, is_saved=False, test_id=test_id, descend=self.descend ) self.response_start( "200", "OK", [ ("Content-Type", "%s; charset=%s" % ( formatter.media_type, charset)), ("Cache-Control", "max-age=60, must-revalidate") ]) ired = HttpResource( self.test_uri, req_hdrs=self.req_hdrs, status_cb=formatter.status, body_procs=[formatter.feed], descend=self.descend ) # sys.stdout.write(pickle.dumps(ired)) formatter.start_output() def done(): if self.check_type: # TODO: catch errors state = ired.subreqs.get(self.check_type, None) else: state = ired formatter.set_state(state) formatter.finish_output() self.response_done([]) if test_id: try: tmp_file = gzip.open(path, 'w') pickle.dump(ired, tmp_file) tmp_file.close() except (IOError, zlib.error, pickle.PickleError): pass # we don't cry if we can't store it. # objgraph.show_growth() ired.run(done)
def load_saved_test(self) -> None: """Load a saved test by test_id.""" try: fd = gzip.open(os.path.join(self.config.save_dir, os.path.basename(self.test_id))) mtime = os.fstat(fd.fileno()).st_mtime except (OSError, IOError, TypeError, zlib.error): self.response_start(b"404", b"Not Found", [ (b"Content-Type", b"text/html; charset=%s" % self.charset_bytes), (b"Cache-Control", b"max-age=600, must-revalidate")]) self.response_body(self.show_error("I'm sorry, I can't find that saved response.")) self.response_done([]) return is_saved = mtime > thor.time() try: top_resource = pickle.load(fd) except (pickle.PickleError, IOError, EOFError): self.response_start(b"500", b"Internal Server Error", [ (b"Content-Type", b"text/html; charset=%s" % self.charset_bytes), (b"Cache-Control", b"max-age=600, must-revalidate")]) self.response_body(self.show_error("I'm sorry, I had a problem loading that.")) self.response_done([]) return finally: fd.close() if self.check_name: display_resource = top_resource.subreqs.get(self.check_name, top_resource) else: display_resource = top_resource formatter = find_formatter(self.format, 'html', top_resource.descend)( self.ui_uri, self.config.lang, self.output, allow_save=(not is_saved), is_saved=True, test_id=self.test_id) content_type = "%s; charset=%s" % (formatter.media_type, self.config.charset) self.response_start(b"200", b"OK", [ (b"Content-Type", content_type.encode('ascii')), (b"Cache-Control", b"max-age=3600, must-revalidate")]) @thor.events.on(formatter) def formatter_done() -> None: self.response_done([]) formatter.bind_resource(display_resource)
def run_test(self) -> None: """Test a URI.""" if self.config.save_dir and os.path.exists(self.config.save_dir): try: fd, path = tempfile.mkstemp(prefix='', dir=self.config.save_dir) test_id = os.path.split(path)[1] except (OSError, IOError): # Don't try to store it. test_id = None else: test_id = None top_resource = HttpResource(descend=self.descend) self.timeout = thor.schedule(self.config.max_runtime, self.timeoutError, top_resource.show_task_map) top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs) formatter = find_formatter(self.format, 'html', self.descend)( self.ui_uri, self.config.lang, self.output, allow_save=test_id, is_saved=False, test_id=test_id, descend=self.descend) content_type = "%s; charset=%s" % (formatter.media_type, self.config.charset) if self.check_name: display_resource = top_resource.subreqs.get(self.check_name, top_resource) else: display_resource = top_resource referers = [] for hdr, value in self.req_hdrs: if hdr.lower() == 'referer': referers.append(value) referer_error = None if len(referers) > 1: referer_error = "Multiple referers not allowed." if referers and urlsplit(referers[0]).hostname in self.config.referer_spam_domains: referer_error = "Referer not allowed." if referer_error: self.response_start(b"403", b"Forbidden", [ (b"Content-Type", content_type.encode('ascii')), (b"Cache-Control", b"max-age=360, must-revalidate")]) formatter.start_output() formatter.error_output(referer_error) self.response_done([]) return if not self.robots_precheck(self.test_uri): self.response_start(b"502", b"Gateway Error", [ (b"Content-Type", content_type.encode('ascii')), (b"Cache-Control", b"max-age=60, must-revalidate")]) formatter.start_output() formatter.error_output("Forbidden by robots.txt.") self.response_done([]) return @thor.events.on(formatter) def formatter_done() -> None: self.response_done([]) if test_id: try: tmp_file = gzip.open(path, 'w') pickle.dump(top_resource, tmp_file) tmp_file.close() except (IOError, zlib.error, pickle.PickleError): pass # we don't cry if we can't store it. ti = sum([i.transfer_in for i, t in top_resource.linked], top_resource.transfer_in) to = sum([i.transfer_out for i, t in top_resource.linked], top_resource.transfer_out) if ti + to > self.config.log_traffic: self.error_log("%iK in %iK out for <%s> (descend %s)" % ( ti / 1024, to / 1024, e_url(self.test_uri), str(self.descend))) self.response_start(b"200", b"OK", [ (b"Content-Type", content_type.encode('ascii')), (b"Cache-Control", b"max-age=60, must-revalidate")]) formatter.bind_resource(display_resource) top_resource.check()
def run_test(self): """Test a URI.""" if save_dir and os.path.exists(save_dir): try: fd, path = tempfile.mkstemp(prefix='', dir=save_dir) test_id = os.path.split(path)[1] except (OSError, IOError): # Don't try to store it. test_id = None else: test_id = None formatter = find_formatter(self.format, 'html', self.descend)( self.base_uri, self.test_uri, self.req_hdrs, lang, self.output, allow_save=test_id, is_saved=False, check_type=self.check_type, test_id=test_id, descend=self.descend ) referers = [] for hdr, value in self.req_hdrs: if hdr.lower() == 'referer': referers.append(value) referer_error = None if len(referers) > 1: referer_error = "Multiple referers not allowed." if referers and urlsplit(referers[0]).hostname in referer_spam_domains: referer_error = "Referer now allowed." if referer_error: self.response_start( "403", "Forbidden", [ ("Content-Type", "%s; charset=%s" % ( formatter.media_type, charset)), ("Cache-Control", "max-age=360, must-revalidate") ]) formatter.start_output() self.output(error_template % referer_error) self.response_done([]) return if not self.robots_precheck(self.test_uri): self.response_start( "502", "Gateway Error", [ ("Content-Type", "%s; charset=%s" % ( formatter.media_type, charset)), ("Cache-Control", "max-age=60, must-revalidate") ]) formatter.start_output() self.output(error_template % "Forbidden by robots.txt.") self.response_done([]) return self.response_start( "200", "OK", [ ("Content-Type", "%s; charset=%s" % ( formatter.media_type, charset)), ("Cache-Control", "max-age=60, must-revalidate") ]) resource = HttpResource( self.test_uri, req_hdrs=self.req_hdrs, status_cb=formatter.status, body_procs=[formatter.feed], descend=self.descend ) # sys.stdout.write(pickle.dumps(resource)) formatter.start_output() def done(): if self.check_type: state = resource.subreqs.get(self.check_type, resource) else: state = resource formatter.set_state(state) formatter.finish_output() self.response_done([]) if test_id: try: tmp_file = gzip.open(path, 'w') pickle.dump(resource, tmp_file) tmp_file.close() except (IOError, zlib.error, pickle.PickleError): pass # we don't cry if we can't store it. # objgraph.show_growth() ti = sum([i.transfer_in for i,t in resource.linked], resource.transfer_in) to = sum([i.transfer_out for i,t in resource.linked], resource.transfer_out) if ti + to > log_traffic: sys.stderr.write("%iK in %iK out for <%s> (descend %s)" % ( ti / 1024, to / 1024, self.test_uri, str(self.descend) )) resource.run(done)
def run_test(self) -> None: """Test a URI.""" # try to initialise stored test results if self.config.get('save_dir', "") and os.path.exists(self.config['save_dir']): try: fd, self.save_path = tempfile.mkstemp(prefix='', dir=self.config['save_dir']) self.test_id = os.path.split(self.save_path)[1] except (OSError, IOError): # Don't try to store it. self.test_id = None # should already be None, but make sure top_resource = HttpResource(self.config, descend=self.descend) self.timeout = thor.schedule(int(self.config['max_runtime']), self.timeoutError, top_resource.show_task_map) top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs) formatter = find_formatter(self.format, 'html', self.descend)( self.config, self.output, allow_save=self.test_id, is_saved=False, test_id=self.test_id, descend=self.descend) # referer limiting referers = [] for hdr, value in self.req_hdrs: if hdr.lower() == 'referer': referers.append(value) referer_error = None if len(referers) > 1: referer_error = "Multiple referers not allowed." if referers and urlsplit(referers[0]).hostname in self.referer_spam_domains: referer_error = "Referer not allowed." if referer_error: self.response_start(b"403", b"Forbidden", [ (b"Content-Type", formatter.content_type()), (b"Cache-Control", b"max-age=360, must-revalidate")]) formatter.start_output() formatter.error_output(referer_error) self.response_done([]) return # robot human check if self.robot_time and self.robot_time.isdigit() and self.robot_hmac: valid_till = int(self.robot_time) computed_hmac = hmac.new(self._robot_secret, bytes(self.robot_time, 'ascii')) is_valid = self.robot_hmac == computed_hmac.hexdigest() if is_valid and valid_till >= thor.time(): self.continue_test(top_resource, formatter) return else: self.response_start(b"403", b"Forbidden", [ (b"Content-Type", formatter.content_type()), (b"Cache-Control", b"max-age=60, must-revalidate")]) formatter.start_output() formatter.error_output("Naughty.") self.response_done([]) self.error_log("Naughty robot key.") # enforce client limits if self.config.getint('limit_client_tests', fallback=0): client_id = self.get_client_id() if client_id: if self._client_counts.get(client_id, 0) > \ self.config.getint('limit_client_tests'): self.response_start(b"429", b"Too Many Requests", [ (b"Content-Type", formatter.content_type()), (b"Cache-Control", b"max-age=60, must-revalidate")]) formatter.start_output() formatter.error_output("Your client is over limit. Please try later.") self.response_done([]) self.error_log("client over limit: %s" % client_id.decode('idna')) return self._client_counts[client_id] += 1 # enforce origin limits if self.config.getint('limit_origin_tests', fallback=0): origin = url_to_origin(self.test_uri) if origin: if self._origin_counts.get(origin, 0) > \ self.config.getint('limit_origin_tests'): self.response_start(b"429", b"Too Many Requests", [ (b"Content-Type", formatter.content_type()), (b"Cache-Control", b"max-age=60, must-revalidate")]) formatter.start_output() formatter.error_output("Origin is over limit. Please try later.") self.response_done([]) self.error_log("origin over limit: %s" % origin) return self._origin_counts[origin] += 1 # check robots.txt robot_fetcher = RobotFetcher(self.config) @thor.events.on(robot_fetcher) def robot(results: Tuple[str, bool]) -> None: url, robot_ok = results if robot_ok: self.continue_test(top_resource, formatter) else: valid_till = str(int(thor.time()) + 60) robot_hmac = hmac.new(self._robot_secret, bytes(valid_till, 'ascii')) self.response_start(b"403", b"Forbidden", [ (b"Content-Type", formatter.content_type()), (b"Cache-Control", b"no-cache")]) formatter.start_output() formatter.error_output("This site doesn't allow robots. If you are human, please <a href='?uri=%s&robot_time=%s&robot_hmac=%s'>click here</a>." % (self.test_uri, valid_till, robot_hmac.hexdigest()) ) self.response_done([]) robot_fetcher.check_robots(HttpRequest.iri_to_uri(self.test_uri))
def run_test(self) -> None: """Test a URI.""" if self.config.save_dir and os.path.exists(self.config.save_dir): try: fd, path = tempfile.mkstemp(prefix='', dir=self.config.save_dir) test_id = os.path.split(path)[1] except (OSError, IOError): # Don't try to store it. test_id = None else: test_id = None top_resource = HttpResource(descend=self.descend) self.timeout = thor.schedule(self.config.max_runtime, self.timeoutError, top_resource.show_task_map) top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs) formatter = find_formatter(self.format, 'html', self.descend)(self.ui_uri, self.config.lang, self.output, allow_save=test_id, is_saved=False, test_id=test_id, descend=self.descend) content_type = "%s; charset=%s" % (formatter.media_type, self.config.charset) if self.check_name: display_resource = top_resource.subreqs.get( self.check_name, top_resource) else: display_resource = top_resource referers = [] for hdr, value in self.req_hdrs: if hdr.lower() == 'referer': referers.append(value) referer_error = None if len(referers) > 1: referer_error = "Multiple referers not allowed." if referers and urlsplit( referers[0]).hostname in self.config.referer_spam_domains: referer_error = "Referer not allowed." if referer_error: self.response_start( b"403", b"Forbidden", [(b"Content-Type", content_type.encode('ascii')), (b"Cache-Control", b"max-age=360, must-revalidate")]) formatter.start_output() formatter.error_output(referer_error) self.response_done([]) return if not self.robots_precheck(self.test_uri): self.response_start( b"502", b"Gateway Error", [(b"Content-Type", content_type.encode('ascii')), (b"Cache-Control", b"max-age=60, must-revalidate")]) formatter.start_output() formatter.error_output("Forbidden by robots.txt.") self.response_done([]) return @thor.events.on(formatter) def formatter_done() -> None: self.response_done([]) if test_id: try: tmp_file = gzip.open(path, 'w') pickle.dump(top_resource, tmp_file) tmp_file.close() except (IOError, zlib.error, pickle.PickleError): pass # we don't cry if we can't store it. ti = sum([i.transfer_in for i, t in top_resource.linked], top_resource.transfer_in) to = sum([i.transfer_out for i, t in top_resource.linked], top_resource.transfer_out) if ti + to > self.config.log_traffic: self.error_log("%iK in %iK out for <%s> (descend %s)" % (ti / 1024, to / 1024, e_url( self.test_uri), str(self.descend))) self.response_start( b"200", b"OK", [(b"Content-Type", content_type.encode('ascii')), (b"Cache-Control", b"max-age=60, must-revalidate")]) formatter.bind_resource(display_resource) top_resource.check()
def load_saved_test(webui: "RedWebUi") -> None: """Load a saved test by test_id.""" try: with gzip.open( os.path.join(webui.config["save_dir"], os.path.basename(webui.test_id))) as fd: mtime = os.fstat(fd.fileno()).st_mtime is_saved = mtime > thor.time() top_resource = pickle.load(fd) except (OSError, TypeError): webui.exchange.response_start( b"404", b"Not Found", [ (b"Content-Type", b"text/html; charset=%s" % webui.charset_bytes), (b"Cache-Control", b"max-age=600, must-revalidate"), ], ) webui.output("I'm sorry, I can't find that saved response.") webui.exchange.response_done([]) return except (pickle.PickleError, zlib.error, EOFError): webui.exchange.response_start( b"500", b"Internal Server Error", [ (b"Content-Type", b"text/html; charset=%s" % webui.charset_bytes), (b"Cache-Control", b"max-age=600, must-revalidate"), ], ) webui.output("I'm sorry, I had a problem loading that.") webui.exchange.response_done([]) return if webui.check_name: display_resource = top_resource.subreqs.get(webui.check_name, top_resource) else: display_resource = top_resource formatter = find_formatter(webui.format, "html", top_resource.descend)( webui.config, display_resource, webui.output, allow_save=(not is_saved), is_saved=True, test_id=webui.test_id, ) webui.exchange.response_start( b"200", b"OK", [ (b"Content-Type", formatter.content_type()), (b"Cache-Control", b"max-age=3600, must-revalidate"), ], ) @thor.events.on(formatter) def formatter_done() -> None: webui.exchange.response_done([]) formatter.bind_resource(display_resource)
def run_test(self) -> None: """Test a URI.""" # try to initialise stored test results if self.config.get("save_dir", "") and os.path.exists( self.config["save_dir"]): try: fd, self.save_path = tempfile.mkstemp( prefix="", dir=self.config["save_dir"]) self.test_id = os.path.split(self.save_path)[1] except (OSError, IOError): # Don't try to store it. self.test_id = None # should already be None, but make sure top_resource = HttpResource(self.config, descend=self.descend) self.timeout = thor.schedule( int(self.config["max_runtime"]), self.timeoutError, top_resource.show_task_map, ) top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs) formatter = find_formatter(self.format, "html", self.descend)( self.config, self.output, allow_save=self.test_id, is_saved=False, test_id=self.test_id, descend=self.descend, ) # referer limiting referers = [] for hdr, value in self.req_hdrs: if hdr.lower() == "referer": referers.append(value) referer_error = None if len(referers) > 1: referer_error = "Multiple referers not allowed." if referers and urlsplit( referers[0]).hostname in self.referer_spam_domains: referer_error = "Referer not allowed." if referer_error: self.response_start( b"403", b"Forbidden", [ (b"Content-Type", formatter.content_type()), (b"Cache-Control", b"max-age=360, must-revalidate"), ], ) formatter.start_output() formatter.error_output(referer_error) self.response_done([]) return # robot human check if self.robot_time and self.robot_time.isdigit() and self.robot_hmac: valid_till = int(self.robot_time) computed_hmac = hmac.new(self._robot_secret, bytes(self.robot_time, "ascii")) is_valid = self.robot_hmac == computed_hmac.hexdigest() if is_valid and valid_till >= thor.time(): self.continue_test(top_resource, formatter) return else: self.response_start( b"403", b"Forbidden", [ (b"Content-Type", formatter.content_type()), (b"Cache-Control", b"max-age=60, must-revalidate"), ], ) formatter.start_output() formatter.error_output("Naughty.") self.response_done([]) self.error_log("Naughty robot key.") # enforce client limits if self.config.getint("limit_client_tests", fallback=0): client_id = self.get_client_id() if client_id: if self._client_counts.get( client_id, 0) > self.config.getint("limit_client_tests"): self.response_start( b"429", b"Too Many Requests", [ (b"Content-Type", formatter.content_type()), (b"Cache-Control", b"max-age=60, must-revalidate"), ], ) formatter.start_output() formatter.error_output( "Your client is over limit. Please try later.") self.response_done([]) self.error_log("client over limit: %s" % client_id.decode("idna")) return self._client_counts[client_id] += 1 # enforce origin limits if self.config.getint("limit_origin_tests", fallback=0): origin = url_to_origin(self.test_uri) if origin: if self._origin_counts.get( origin, 0) > self.config.getint("limit_origin_tests"): self.response_start( b"429", b"Too Many Requests", [ (b"Content-Type", formatter.content_type()), (b"Cache-Control", b"max-age=60, must-revalidate"), ], ) formatter.start_output() formatter.error_output( "Origin is over limit. Please try later.") self.response_done([]) self.error_log("origin over limit: %s" % origin) return self._origin_counts[origin] += 1 # check robots.txt robot_fetcher = RobotFetcher(self.config) @thor.events.on(robot_fetcher) def robot(results: Tuple[str, bool]) -> None: url, robot_ok = results if robot_ok: self.continue_test(top_resource, formatter) else: valid_till = str(int(thor.time()) + 60) robot_hmac = hmac.new(self._robot_secret, bytes(valid_till, "ascii")) self.response_start( b"403", b"Forbidden", [ (b"Content-Type", formatter.content_type()), (b"Cache-Control", b"no-cache"), ], ) formatter.start_output() formatter.error_output( "This site doesn't allow robots. If you are human, please <a href='?uri=%s&robot_time=%s&robot_hmac=%s'>click here</a>." % (self.test_uri, valid_till, robot_hmac.hexdigest())) self.response_done([]) robot_fetcher.check_robots(HttpRequest.iri_to_uri(self.test_uri))
def run_test(self): """Test a URI.""" if save_dir and os.path.exists(save_dir): try: fd, path = tempfile.mkstemp(prefix="", dir=save_dir) test_id = os.path.split(path)[1] except (OSError, IOError): # Don't try to store it. test_id = None else: test_id = None formatter = find_formatter(self.format, "html", self.descend)( self.base_uri, self.test_uri, self.req_hdrs, lang, self.output, allow_save=test_id, is_saved=False, test_id=test_id, descend=self.descend, ) referers = [] for hdr, value in self.req_hdrs: if hdr.lower() == "referer": referers.append(value) referer_error = None if len(referers) > 1: referer_error = "Multiple referers not allowed." if referers and urlsplit(referers[0]).hostname in referer_spam_domains: referer_error = "Referer now allowed." if referer_error: self.response_start( "403", "Forbidden", [ ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)), ("Cache-Control", "max-age=360, must-revalidate"), ], ) formatter.start_output() self.output(error_template % referer_error) self.response_done([]) return if not self.robots_precheck(self.test_uri): self.response_start( "502", "Gateway Error", [ ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)), ("Cache-Control", "max-age=60, must-revalidate"), ], ) formatter.start_output() self.output(error_template % "Forbidden by robots.txt.") self.response_done([]) return self.response_start( "200", "OK", [ ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)), ("Cache-Control", "max-age=60, must-revalidate"), ], ) ired = HttpResource( self.test_uri, req_hdrs=self.req_hdrs, status_cb=formatter.status, body_procs=[formatter.feed], descend=self.descend, ) # sys.stdout.write(pickle.dumps(ired)) formatter.start_output() def done(): if self.check_type: # TODO: catch errors state = ired.subreqs.get(self.check_type, None) else: state = ired formatter.set_state(state) formatter.finish_output() self.response_done([]) if test_id: try: tmp_file = gzip.open(path, "w") pickle.dump(ired, tmp_file) tmp_file.close() except (IOError, zlib.error, pickle.PickleError): pass # we don't cry if we can't store it. # objgraph.show_growth() ired.run(done)
def __init__(self, test_id, test_uri, req_hdrs, base_uri, format, output_hdrs, output_body, descend=False, save=False): self.output_body = output_body self.start = time.time() timeout = nbhttp.schedule(max_runtime, self.timeoutError) if save and save_dir and test_id: try: os.utime( os.path.join(save_dir, test_id), ( nbhttp.now(), nbhttp.now() + (save_days * 24 * 60 * 60) ) ) location = "?id=%s" % test_id if descend: location = "%s&descend=True" % location output_hdrs("303 See Other", [ ("Location", location) ]) output_body("Redirecting...") except (OSError, IOError): output_hdrs("500 Internal Server Error", [ ("Content-Type", "text/html; charset=%s" % charset), ]) # TODO: better error message (through formatter?) output_body(error_template % "Sorry, I couldn't save that.") elif test_id: try: test_id = os.path.basename(test_id) fd = gzip.open(os.path.join(save_dir, test_id)) mtime = os.fstat(fd.fileno()).st_mtime except (OSError, IOError, zlib.error): output_hdrs("404 Not Found", [ ("Content-Type", "text/html; charset=%s" % charset), ("Cache-Control", "max-age=600, must-revalidate") ]) # TODO: better error page (through formatter?) self.output_body(error_template % "I'm sorry, I can't find that saved response." ) timeout.delete() return is_saved = mtime > nbhttp.now() try: ired = pickle.load(fd) except (pickle.PickleError, EOFError): output_hdrs("500 Internal Server Error", [ ("Content-Type", "text/html; charset=%s" % charset), ("Cache-Control", "max-age=600, must-revalidate") ]) # TODO: better error page (through formatter?) self.output_body(error_template % "I'm sorry, I had a problem reading that response." ) timeout.delete() return finally: fd.close() formatter = find_formatter(format, 'html', descend)( base_uri, ired.uri, ired.orig_req_hdrs, lang, self.output, allow_save=(not is_saved), is_saved=True, test_id=test_id ) output_hdrs("200 OK", [ ("Content-Type", "%s; charset=%s" % ( formatter.media_type, charset)), ("Cache-Control", "max-age=3600, must-revalidate") ]) formatter.start_output() formatter.finish_output(ired) elif test_uri: if save_dir and os.path.exists(save_dir): try: fd, path = tempfile.mkstemp(prefix='', dir=save_dir) test_id = os.path.split(path)[1] except (OSError, IOError): # Don't try to store it. test_id = None else: test_id = None formatter = find_formatter(format, 'html', descend)( base_uri, test_uri, req_hdrs, lang, self.output, allow_save=test_id, is_saved=False, test_id=test_id, descend=descend ) output_hdrs("200 OK", [ ("Content-Type", "%s; charset=%s" % ( formatter.media_type, charset)), ("Cache-Control", "max-age=60, must-revalidate") ]) formatter.start_output() ired = droid.InspectingResourceExpertDroid( test_uri, req_hdrs=req_hdrs, status_cb=formatter.status, body_procs=[formatter.feed], descend=descend ) formatter.finish_output(ired) if test_id: try: tmp_file = gzip.open(path, 'w') pickle.dump(ired, tmp_file) tmp_file.close() except (IOError, zlib.error, pickle.PickleError): pass # we don't cry if we can't store it. else: # no test_uri formatter = html.BaseHtmlFormatter( base_uri, test_uri, req_hdrs, lang, self.output) output_hdrs("200 OK", [ ("Content-Type", "%s; charset=%s" % ( formatter.media_type, charset) ), ("Cache-Control", "max-age=300") ]) formatter.start_output() formatter.finish_output(None) timeout.delete()
def __init__( self, config: SectionProxy, method: str, query_string: bytes, req_headers: RawHeaderListType, req_body: bytes, exchange: HttpResponseExchange, error_log: Callable[[str], int] = sys.stderr.write, ) -> None: self.config = config # type: SectionProxy self.query_string = parse_qs( query_string.decode(self.config["charset"], "replace")) self.req_headers = req_headers self.req_body = req_body self.body_args = {} self.exchange = exchange self.error_log = error_log # function to log errors to # query processing self.test_uri = self.query_string.get("uri", [""])[0] self.test_id = self.query_string.get("id", [None])[0] self.req_hdrs = [ tuple(h.split(":", 1)) # type: ignore for h in self.query_string.get("req_hdr", []) if h.find(":") > 0 ] # type: StrHeaderListType self.format = self.query_string.get("format", ["html"])[0] self.descend = "descend" in self.query_string self.check_name = None # type: str if not self.descend: self.check_name = self.query_string.get("check_name", [None])[0] self.charset_bytes = self.config["charset"].encode("ascii") self.save_path = None # type: str self.timeout = None # type: Any self.start = time.time() if method == "POST": req_ct = get_header(self.req_headers, b"content-type") if req_ct and req_ct[-1].lower( ) == b"application/x-www-form-urlencoded": self.body_args = parse_qs( req_body.decode(self.config["charset"], "replace")) if ("save" in self.query_string and self.config.get("save_dir", "") and self.test_id): extend_saved_test(self) elif "slack" in self.query_string: slack_run(self) elif "client_error" in self.query_string: self.dump_client_error() elif self.test_uri: self.run_test() else: self.show_default() elif method in ["GET", "HEAD"]: if self.test_id: load_saved_test(self) elif "code" in self.query_string: slack_auth(self) else: self.show_default() else: self.error_response( find_formatter("html")(self.config, None, self.output), b"405", b"Method Not Allowed", "Method Not Allowed", )
def run_test(self) -> None: """Test a URI.""" self.test_id = init_save_file(self) top_resource = HttpResource(self.config, descend=self.descend) top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs) formatter = find_formatter(self.format, "html", self.descend)( self.config, top_resource, self.output, allow_save=self.test_id, is_saved=False, test_id=self.test_id, descend=self.descend, ) continue_test = partial(self.continue_test, top_resource, formatter) error_response = partial(self.error_response, formatter) self.timeout = thor.schedule( int(self.config["max_runtime"]), self.timeoutError, top_resource.show_task_map, ) # referer limiting referers = [] for hdr, value in self.req_hdrs: if hdr.lower() == "referer": referers.append(value) referer_error = None if len(referers) > 1: referer_error = "Multiple referers not allowed." referer_spam_domains = [ i.strip() for i in self.config.get("referer_spam_domains", fallback="").split() ] if (referer_spam_domains and referers and urlsplit(referers[0]).hostname in referer_spam_domains): referer_error = "Referer not allowed." if referer_error: error_response(b"403", b"Forbidden", referer_error) return # enforce client limits try: ratelimiter.process(self, error_response) except ValueError: return # over limit, don't continue. # hCaptcha if self.config.get("hcaptcha_sitekey", "") and self.config.get( "hcaptcha_secret", ""): CaptchaHandler( self, self.get_client_id(), continue_test, error_response, ).run() else: continue_test()