Beispiel #1
0
    def load_saved_test(self) -> None:
        """Load a saved test by test_id."""
        try:
            fd = gzip.open(
                os.path.join(self.config.save_dir,
                             os.path.basename(self.test_id)))
            mtime = os.fstat(fd.fileno()).st_mtime
        except (OSError, IOError, TypeError, zlib.error):
            self.response_start(
                b"404", b"Not Found",
                [(b"Content-Type",
                  b"text/html; charset=%s" % self.charset_bytes),
                 (b"Cache-Control", b"max-age=600, must-revalidate")])
            self.response_body(
                self.show_error(
                    "I'm sorry, I can't find that saved response."))
            self.response_done([])
            return
        is_saved = mtime > thor.time()
        try:
            top_resource = pickle.load(fd)
        except (pickle.PickleError, IOError, EOFError):
            self.response_start(
                b"500", b"Internal Server Error",
                [(b"Content-Type",
                  b"text/html; charset=%s" % self.charset_bytes),
                 (b"Cache-Control", b"max-age=600, must-revalidate")])
            self.response_body(
                self.show_error("I'm sorry, I had a problem loading that."))
            self.response_done([])
            return
        finally:
            fd.close()

        if self.check_name:
            display_resource = top_resource.subreqs.get(
                self.check_name, top_resource)
        else:
            display_resource = top_resource

        formatter = find_formatter(self.format, 'html', top_resource.descend)(
            self.ui_uri,
            self.config.lang,
            self.output,
            allow_save=(not is_saved),
            is_saved=True,
            test_id=self.test_id)
        content_type = "%s; charset=%s" % (formatter.media_type,
                                           self.config.charset)

        self.response_start(
            b"200", b"OK",
            [(b"Content-Type", content_type.encode('ascii')),
             (b"Cache-Control", b"max-age=3600, must-revalidate")])

        @thor.events.on(formatter)
        def formatter_done() -> None:
            self.response_done([])

        formatter.bind_resource(display_resource)
Beispiel #2
0
    def load_saved_test(self):
        """Load a saved test by test_id."""
        try:
            fd = gzip.open(os.path.join(
                save_dir, os.path.basename(self.test_id)
            ))
            mtime = os.fstat(fd.fileno()).st_mtime
        except (OSError, IOError, TypeError, zlib.error):
            self.response_start(
                "404", "Not Found", [
                ("Content-Type", "text/html; charset=%s" % charset), 
                ("Cache-Control", "max-age=600, must-revalidate")
            ])
            # TODO: better error page (through formatter?)
            self.response_body(error_template % 
                "I'm sorry, I can't find that saved response."
            )
            self.response_done([])
            return
        is_saved = mtime > thor.time()
        try:
            state = pickle.load(fd)
        except (pickle.PickleError, EOFError):
            self.response_start(
                "500", "Internal Server Error", [
                ("Content-Type", "text/html; charset=%s" % charset), 
                ("Cache-Control", "max-age=600, must-revalidate")
            ])
            # TODO: better error page (through formatter?)
            self.response_body(error_template % 
                "I'm sorry, I had a problem reading that response."
            )
            self.response_done([])
            return
        finally:
            fd.close()
            
        formatter = find_formatter(self.format, 'html', self.descend)(
            self.base_uri, state.request.uri, state.orig_req_hdrs, lang,
            self.output, allow_save=(not is_saved), is_saved=True,
            test_id=self.test_id
        )
        self.response_start(
            "200", "OK", [
            ("Content-Type", "%s; charset=%s" % (
                formatter.media_type, charset)), 
            ("Cache-Control", "max-age=3600, must-revalidate")
        ])
        if self.check_type:
        # TODO: catch errors
            state = state.subreqs.get(self.check_type, None)

        formatter.start_output()
        formatter.set_state(state)
        formatter.finish_output()
        self.response_done([])
Beispiel #3
0
    def run_test(self):
        """Test a URI."""
        if save_dir and os.path.exists(save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='', dir=save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        formatter = find_formatter(self.format, 'html',
                                   self.descend)(self.base_uri,
                                                 self.test_uri,
                                                 self.req_hdrs,
                                                 lang,
                                                 self.output,
                                                 allow_save=test_id,
                                                 is_saved=False,
                                                 test_id=test_id,
                                                 descend=self.descend)

        self.response_start("200", "OK",
                            [("Content-Type", "%s; charset=%s" %
                              (formatter.media_type, charset)),
                             ("Cache-Control", "max-age=60, must-revalidate")])

        ired = droid.InspectingResourceExpertDroid(self.test_uri,
                                                   req_hdrs=self.req_hdrs,
                                                   status_cb=formatter.status,
                                                   body_procs=[formatter.feed],
                                                   descend=self.descend)
        #        sys.stdout.write(pickle.dumps(ired.state))
        formatter.start_output()

        def done():
            if self.req_type:
                # TODO: catch errors
                state = ired.state.subreqs.get(self.req_type, None)
            else:
                state = ired.state
            formatter.set_red(state)
            formatter.finish_output()
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(ired.state, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass  # we don't cry if we can't store it.
#            objgraph.show_growth()

        ired.run(done)
Beispiel #4
0
    def load_saved_test(self):
        """Load a saved test by test_id."""
        try:
            fd = gzip.open(
                os.path.join(save_dir, os.path.basename(self.test_id)))
            mtime = os.fstat(fd.fileno()).st_mtime
        except (OSError, IOError, TypeError, zlib.error):
            self.response_start(
                "404", "Not Found",
                [("Content-Type", "text/html; charset=%s" % charset),
                 ("Cache-Control", "max-age=600, must-revalidate")])
            # TODO: better error page (through formatter?)
            self.response_body(error_template %
                               "I'm sorry, I can't find that saved response.")
            self.response_done([])
            return
        is_saved = mtime > thor.time()
        try:
            state = pickle.load(fd)
        except (pickle.PickleError, EOFError):
            self.response_start(
                "500", "Internal Server Error",
                [("Content-Type", "text/html; charset=%s" % charset),
                 ("Cache-Control", "max-age=600, must-revalidate")])
            # TODO: better error page (through formatter?)
            self.response_body(
                error_template %
                "I'm sorry, I had a problem reading that response.")
            self.response_done([])
            return
        finally:
            fd.close()

        formatter = find_formatter(self.format, 'html',
                                   self.descend)(self.base_uri,
                                                 state.uri,
                                                 state.orig_req_hdrs,
                                                 lang,
                                                 self.output,
                                                 allow_save=(not is_saved),
                                                 is_saved=True,
                                                 test_id=self.test_id)
        self.response_start(
            "200", "OK", [("Content-Type", "%s; charset=%s" %
                           (formatter.media_type, charset)),
                          ("Cache-Control", "max-age=3600, must-revalidate")])
        if self.req_type:
            # TODO: catch errors
            state = state.subreqs.get(self.req_type, None)

        formatter.start_output()
        formatter.set_red(state)
        formatter.finish_output()
        self.response_done([])
Beispiel #5
0
    def run_test(self):
        """Test a URI."""
        if save_dir and os.path.exists(save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='', dir=save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it. 
                test_id = None
        else:
            test_id = None

        formatter = find_formatter(self.format, 'html', self.descend)(
            self.base_uri, self.test_uri, self.req_hdrs, lang,
            self.output, allow_save=test_id, is_saved=False,
            test_id=test_id, descend=self.descend
        )

        self.response_start(
            "200", "OK", [
            ("Content-Type", "%s; charset=%s" % (
                formatter.media_type, charset)), 
            ("Cache-Control", "max-age=60, must-revalidate")
        ])
        
        ired = HttpResource(
            self.test_uri,
            req_hdrs=self.req_hdrs,
            status_cb=formatter.status,
            body_procs=[formatter.feed],
            descend=self.descend
        )
#        sys.stdout.write(pickle.dumps(ired))
        formatter.start_output()

        def done():
            if self.check_type:
            # TODO: catch errors
                state = ired.subreqs.get(self.check_type, None)
            else:
                state = ired
            formatter.set_state(state)
            formatter.finish_output()
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(ired, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass # we don't cry if we can't store it.
#            objgraph.show_growth()        
        ired.run(done)
Beispiel #6
0
    def load_saved_test(self) -> None:
        """Load a saved test by test_id."""
        try:
            fd = gzip.open(os.path.join(self.config.save_dir, os.path.basename(self.test_id)))
            mtime = os.fstat(fd.fileno()).st_mtime
        except (OSError, IOError, TypeError, zlib.error):
            self.response_start(b"404", b"Not Found", [
                (b"Content-Type", b"text/html; charset=%s" % self.charset_bytes),
                (b"Cache-Control", b"max-age=600, must-revalidate")])
            self.response_body(self.show_error("I'm sorry, I can't find that saved response."))
            self.response_done([])
            return
        is_saved = mtime > thor.time()
        try:
            top_resource = pickle.load(fd)
        except (pickle.PickleError, IOError, EOFError):
            self.response_start(b"500", b"Internal Server Error", [
                (b"Content-Type", b"text/html; charset=%s" % self.charset_bytes),
                (b"Cache-Control", b"max-age=600, must-revalidate")])
            self.response_body(self.show_error("I'm sorry, I had a problem loading that."))
            self.response_done([])
            return
        finally:
            fd.close()

        if self.check_name:
            display_resource = top_resource.subreqs.get(self.check_name, top_resource)
        else:
            display_resource = top_resource

        formatter = find_formatter(self.format, 'html', top_resource.descend)(
            self.ui_uri, self.config.lang, self.output,
            allow_save=(not is_saved), is_saved=True, test_id=self.test_id)
        content_type = "%s; charset=%s" % (formatter.media_type, self.config.charset)

        self.response_start(b"200", b"OK", [
            (b"Content-Type", content_type.encode('ascii')),
            (b"Cache-Control", b"max-age=3600, must-revalidate")])
        @thor.events.on(formatter)
        def formatter_done() -> None:
            self.response_done([])
        formatter.bind_resource(display_resource)
Beispiel #7
0
    def run_test(self) -> None:
        """Test a URI."""
        if self.config.save_dir and os.path.exists(self.config.save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='', dir=self.config.save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        top_resource = HttpResource(descend=self.descend)
        self.timeout = thor.schedule(self.config.max_runtime, self.timeoutError,
                                     top_resource.show_task_map)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, 'html', self.descend)(
            self.ui_uri, self.config.lang, self.output,
            allow_save=test_id, is_saved=False, test_id=test_id, descend=self.descend)
        content_type = "%s; charset=%s" % (formatter.media_type, self.config.charset)
        if self.check_name:
            display_resource = top_resource.subreqs.get(self.check_name, top_resource)
        else:
            display_resource = top_resource

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in self.config.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(b"403", b"Forbidden", [
                (b"Content-Type", content_type.encode('ascii')),
                (b"Cache-Control", b"max-age=360, must-revalidate")])
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(b"502", b"Gateway Error", [
                (b"Content-Type", content_type.encode('ascii')),
                (b"Cache-Control", b"max-age=60, must-revalidate")])
            formatter.start_output()
            formatter.error_output("Forbidden by robots.txt.")
            self.response_done([])
            return

        @thor.events.on(formatter)
        def formatter_done() -> None:
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(top_resource, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass # we don't cry if we can't store it.
            ti = sum([i.transfer_in for i, t in top_resource.linked], top_resource.transfer_in)
            to = sum([i.transfer_out for i, t in top_resource.linked], top_resource.transfer_out)
            if ti + to > self.config.log_traffic:
                self.error_log("%iK in %iK out for <%s> (descend %s)" % (
                    ti / 1024, to / 1024, e_url(self.test_uri), str(self.descend)))

        self.response_start(b"200", b"OK", [
            (b"Content-Type", content_type.encode('ascii')),
            (b"Cache-Control", b"max-age=60, must-revalidate")])
        formatter.bind_resource(display_resource)
        top_resource.check()
Beispiel #8
0
    def run_test(self):
        """Test a URI."""
        if save_dir and os.path.exists(save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='', dir=save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        formatter = find_formatter(self.format, 'html', self.descend)(
            self.base_uri, self.test_uri, self.req_hdrs, lang,
            self.output, allow_save=test_id, is_saved=False,
            check_type=self.check_type, test_id=test_id, descend=self.descend
        )

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in referer_spam_domains:
            referer_error = "Referer now allowed."
        if referer_error:
            self.response_start(
                "403", "Forbidden", [
                ("Content-Type", "%s; charset=%s" % (
                    formatter.media_type, charset)),
                ("Cache-Control", "max-age=360, must-revalidate")
            ])
            formatter.start_output()
            self.output(error_template % referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(
                "502", "Gateway Error", [
                ("Content-Type", "%s; charset=%s" % (
                    formatter.media_type, charset)),
                ("Cache-Control", "max-age=60, must-revalidate")
            ])
            formatter.start_output()
            self.output(error_template % "Forbidden by robots.txt.")
            self.response_done([])
            return

        self.response_start(
            "200", "OK", [
            ("Content-Type", "%s; charset=%s" % (
                formatter.media_type, charset)),
            ("Cache-Control", "max-age=60, must-revalidate")
        ])

        resource = HttpResource(
            self.test_uri,
            req_hdrs=self.req_hdrs,
            status_cb=formatter.status,
            body_procs=[formatter.feed],
            descend=self.descend
        )
#        sys.stdout.write(pickle.dumps(resource))
        formatter.start_output()

        def done():
            if self.check_type:
                state = resource.subreqs.get(self.check_type, resource)
            else:
                state = resource
            formatter.set_state(state)
            formatter.finish_output()
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(resource, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass # we don't cry if we can't store it.
#            objgraph.show_growth()
            ti = sum([i.transfer_in for i,t in resource.linked], resource.transfer_in)
            to = sum([i.transfer_out for i,t in resource.linked], resource.transfer_out)
            if ti + to > log_traffic:
                sys.stderr.write("%iK in %iK out for <%s> (descend %s)" % (
                    ti / 1024,
                    to / 1024,
                    self.test_uri,
                    str(self.descend)
                ))

        resource.run(done)
Beispiel #9
0
    def run_test(self) -> None:
        """Test a URI."""
        # try to initialise stored test results
        if self.config.get('save_dir', "") and os.path.exists(self.config['save_dir']):
            try:
                fd, self.save_path = tempfile.mkstemp(prefix='', dir=self.config['save_dir'])
                self.test_id = os.path.split(self.save_path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                self.test_id = None # should already be None, but make sure

        top_resource = HttpResource(self.config, descend=self.descend)
        self.timeout = thor.schedule(int(self.config['max_runtime']), self.timeoutError,
                                     top_resource.show_task_map)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, 'html', self.descend)(
            self.config, self.output, allow_save=self.test_id, is_saved=False,
            test_id=self.test_id, descend=self.descend)

        # referer limiting
        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in self.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(b"403", b"Forbidden", [
                (b"Content-Type", formatter.content_type()),
                (b"Cache-Control", b"max-age=360, must-revalidate")])
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        # robot human check
        if self.robot_time and self.robot_time.isdigit() and self.robot_hmac:
            valid_till = int(self.robot_time)
            computed_hmac = hmac.new(self._robot_secret, bytes(self.robot_time, 'ascii'))
            is_valid = self.robot_hmac == computed_hmac.hexdigest()
            if is_valid and valid_till >= thor.time():
                self.continue_test(top_resource, formatter)
                return
            else:
                self.response_start(b"403", b"Forbidden", [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"max-age=60, must-revalidate")])
                formatter.start_output()
                formatter.error_output("Naughty.")
                self.response_done([])
                self.error_log("Naughty robot key.")

        # enforce client limits
        if self.config.getint('limit_client_tests', fallback=0):
            client_id = self.get_client_id()
            if client_id:
                if self._client_counts.get(client_id, 0) > \
                  self.config.getint('limit_client_tests'):
                    self.response_start(b"429", b"Too Many Requests", [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate")])
                    formatter.start_output()
                    formatter.error_output("Your client is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("client over limit: %s" % client_id.decode('idna'))
                    return
                self._client_counts[client_id] += 1

        # enforce origin limits
        if self.config.getint('limit_origin_tests', fallback=0):
            origin = url_to_origin(self.test_uri)
            if origin:
                if self._origin_counts.get(origin, 0) > \
                  self.config.getint('limit_origin_tests'):
                    self.response_start(b"429", b"Too Many Requests", [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate")])
                    formatter.start_output()
                    formatter.error_output("Origin is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("origin over limit: %s" % origin)
                    return
                self._origin_counts[origin] += 1

        # check robots.txt
        robot_fetcher = RobotFetcher(self.config)
        @thor.events.on(robot_fetcher)
        def robot(results: Tuple[str, bool]) -> None:
            url, robot_ok = results
            if robot_ok:
                self.continue_test(top_resource, formatter)
            else:
                valid_till = str(int(thor.time()) + 60)
                robot_hmac = hmac.new(self._robot_secret, bytes(valid_till, 'ascii'))
                self.response_start(b"403", b"Forbidden", [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"no-cache")])
                formatter.start_output()
                formatter.error_output("This site doesn't allow robots. If you are human, please <a href='?uri=%s&robot_time=%s&robot_hmac=%s'>click here</a>." % (self.test_uri, valid_till, robot_hmac.hexdigest()) )
                self.response_done([])

        robot_fetcher.check_robots(HttpRequest.iri_to_uri(self.test_uri))
Beispiel #10
0
    def run_test(self) -> None:
        """Test a URI."""
        if self.config.save_dir and os.path.exists(self.config.save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='',
                                            dir=self.config.save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        top_resource = HttpResource(descend=self.descend)
        self.timeout = thor.schedule(self.config.max_runtime,
                                     self.timeoutError,
                                     top_resource.show_task_map)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, 'html',
                                   self.descend)(self.ui_uri,
                                                 self.config.lang,
                                                 self.output,
                                                 allow_save=test_id,
                                                 is_saved=False,
                                                 test_id=test_id,
                                                 descend=self.descend)
        content_type = "%s; charset=%s" % (formatter.media_type,
                                           self.config.charset)
        if self.check_name:
            display_resource = top_resource.subreqs.get(
                self.check_name, top_resource)
        else:
            display_resource = top_resource

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(
                referers[0]).hostname in self.config.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(
                b"403", b"Forbidden",
                [(b"Content-Type", content_type.encode('ascii')),
                 (b"Cache-Control", b"max-age=360, must-revalidate")])
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(
                b"502", b"Gateway Error",
                [(b"Content-Type", content_type.encode('ascii')),
                 (b"Cache-Control", b"max-age=60, must-revalidate")])
            formatter.start_output()
            formatter.error_output("Forbidden by robots.txt.")
            self.response_done([])
            return

        @thor.events.on(formatter)
        def formatter_done() -> None:
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(top_resource, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass  # we don't cry if we can't store it.
            ti = sum([i.transfer_in for i, t in top_resource.linked],
                     top_resource.transfer_in)
            to = sum([i.transfer_out for i, t in top_resource.linked],
                     top_resource.transfer_out)
            if ti + to > self.config.log_traffic:
                self.error_log("%iK in %iK out for <%s> (descend %s)" %
                               (ti / 1024, to / 1024, e_url(
                                   self.test_uri), str(self.descend)))

        self.response_start(
            b"200", b"OK",
            [(b"Content-Type", content_type.encode('ascii')),
             (b"Cache-Control", b"max-age=60, must-revalidate")])
        formatter.bind_resource(display_resource)
        top_resource.check()
Beispiel #11
0
def load_saved_test(webui: "RedWebUi") -> None:
    """Load a saved test by test_id."""
    try:
        with gzip.open(
                os.path.join(webui.config["save_dir"],
                             os.path.basename(webui.test_id))) as fd:
            mtime = os.fstat(fd.fileno()).st_mtime
            is_saved = mtime > thor.time()
            top_resource = pickle.load(fd)
    except (OSError, TypeError):
        webui.exchange.response_start(
            b"404",
            b"Not Found",
            [
                (b"Content-Type",
                 b"text/html; charset=%s" % webui.charset_bytes),
                (b"Cache-Control", b"max-age=600, must-revalidate"),
            ],
        )
        webui.output("I'm sorry, I can't find that saved response.")
        webui.exchange.response_done([])
        return
    except (pickle.PickleError, zlib.error, EOFError):
        webui.exchange.response_start(
            b"500",
            b"Internal Server Error",
            [
                (b"Content-Type",
                 b"text/html; charset=%s" % webui.charset_bytes),
                (b"Cache-Control", b"max-age=600, must-revalidate"),
            ],
        )
        webui.output("I'm sorry, I had a problem loading that.")
        webui.exchange.response_done([])
        return

    if webui.check_name:
        display_resource = top_resource.subreqs.get(webui.check_name,
                                                    top_resource)
    else:
        display_resource = top_resource

    formatter = find_formatter(webui.format, "html", top_resource.descend)(
        webui.config,
        display_resource,
        webui.output,
        allow_save=(not is_saved),
        is_saved=True,
        test_id=webui.test_id,
    )

    webui.exchange.response_start(
        b"200",
        b"OK",
        [
            (b"Content-Type", formatter.content_type()),
            (b"Cache-Control", b"max-age=3600, must-revalidate"),
        ],
    )

    @thor.events.on(formatter)
    def formatter_done() -> None:
        webui.exchange.response_done([])

    formatter.bind_resource(display_resource)
Beispiel #12
0
    def run_test(self) -> None:
        """Test a URI."""
        # try to initialise stored test results
        if self.config.get("save_dir", "") and os.path.exists(
                self.config["save_dir"]):
            try:
                fd, self.save_path = tempfile.mkstemp(
                    prefix="", dir=self.config["save_dir"])
                self.test_id = os.path.split(self.save_path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                self.test_id = None  # should already be None, but make sure

        top_resource = HttpResource(self.config, descend=self.descend)
        self.timeout = thor.schedule(
            int(self.config["max_runtime"]),
            self.timeoutError,
            top_resource.show_task_map,
        )
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, "html", self.descend)(
            self.config,
            self.output,
            allow_save=self.test_id,
            is_saved=False,
            test_id=self.test_id,
            descend=self.descend,
        )

        # referer limiting
        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == "referer":
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(
                referers[0]).hostname in self.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(
                b"403",
                b"Forbidden",
                [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"max-age=360, must-revalidate"),
                ],
            )
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        # robot human check
        if self.robot_time and self.robot_time.isdigit() and self.robot_hmac:
            valid_till = int(self.robot_time)
            computed_hmac = hmac.new(self._robot_secret,
                                     bytes(self.robot_time, "ascii"))
            is_valid = self.robot_hmac == computed_hmac.hexdigest()
            if is_valid and valid_till >= thor.time():
                self.continue_test(top_resource, formatter)
                return
            else:
                self.response_start(
                    b"403",
                    b"Forbidden",
                    [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate"),
                    ],
                )
                formatter.start_output()
                formatter.error_output("Naughty.")
                self.response_done([])
                self.error_log("Naughty robot key.")

        # enforce client limits
        if self.config.getint("limit_client_tests", fallback=0):
            client_id = self.get_client_id()
            if client_id:
                if self._client_counts.get(
                        client_id,
                        0) > self.config.getint("limit_client_tests"):
                    self.response_start(
                        b"429",
                        b"Too Many Requests",
                        [
                            (b"Content-Type", formatter.content_type()),
                            (b"Cache-Control", b"max-age=60, must-revalidate"),
                        ],
                    )
                    formatter.start_output()
                    formatter.error_output(
                        "Your client is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("client over limit: %s" %
                                   client_id.decode("idna"))
                    return
                self._client_counts[client_id] += 1

        # enforce origin limits
        if self.config.getint("limit_origin_tests", fallback=0):
            origin = url_to_origin(self.test_uri)
            if origin:
                if self._origin_counts.get(
                        origin, 0) > self.config.getint("limit_origin_tests"):
                    self.response_start(
                        b"429",
                        b"Too Many Requests",
                        [
                            (b"Content-Type", formatter.content_type()),
                            (b"Cache-Control", b"max-age=60, must-revalidate"),
                        ],
                    )
                    formatter.start_output()
                    formatter.error_output(
                        "Origin is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("origin over limit: %s" % origin)
                    return
                self._origin_counts[origin] += 1

        # check robots.txt
        robot_fetcher = RobotFetcher(self.config)

        @thor.events.on(robot_fetcher)
        def robot(results: Tuple[str, bool]) -> None:
            url, robot_ok = results
            if robot_ok:
                self.continue_test(top_resource, formatter)
            else:
                valid_till = str(int(thor.time()) + 60)
                robot_hmac = hmac.new(self._robot_secret,
                                      bytes(valid_till, "ascii"))
                self.response_start(
                    b"403",
                    b"Forbidden",
                    [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"no-cache"),
                    ],
                )
                formatter.start_output()
                formatter.error_output(
                    "This site doesn't allow robots. If you are human, please <a href='?uri=%s&robot_time=%s&robot_hmac=%s'>click here</a>."
                    % (self.test_uri, valid_till, robot_hmac.hexdigest()))
                self.response_done([])

        robot_fetcher.check_robots(HttpRequest.iri_to_uri(self.test_uri))
Beispiel #13
0
    def run_test(self):
        """Test a URI."""
        if save_dir and os.path.exists(save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix="", dir=save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        formatter = find_formatter(self.format, "html", self.descend)(
            self.base_uri,
            self.test_uri,
            self.req_hdrs,
            lang,
            self.output,
            allow_save=test_id,
            is_saved=False,
            test_id=test_id,
            descend=self.descend,
        )

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == "referer":
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in referer_spam_domains:
            referer_error = "Referer now allowed."
        if referer_error:
            self.response_start(
                "403",
                "Forbidden",
                [
                    ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)),
                    ("Cache-Control", "max-age=360, must-revalidate"),
                ],
            )
            formatter.start_output()
            self.output(error_template % referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(
                "502",
                "Gateway Error",
                [
                    ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)),
                    ("Cache-Control", "max-age=60, must-revalidate"),
                ],
            )
            formatter.start_output()
            self.output(error_template % "Forbidden by robots.txt.")
            self.response_done([])
            return

        self.response_start(
            "200",
            "OK",
            [
                ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)),
                ("Cache-Control", "max-age=60, must-revalidate"),
            ],
        )

        ired = HttpResource(
            self.test_uri,
            req_hdrs=self.req_hdrs,
            status_cb=formatter.status,
            body_procs=[formatter.feed],
            descend=self.descend,
        )
        #        sys.stdout.write(pickle.dumps(ired))
        formatter.start_output()

        def done():
            if self.check_type:
                # TODO: catch errors
                state = ired.subreqs.get(self.check_type, None)
            else:
                state = ired
            formatter.set_state(state)
            formatter.finish_output()
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, "w")
                    pickle.dump(ired, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass  # we don't cry if we can't store it.

        #            objgraph.show_growth()
        ired.run(done)
Beispiel #14
0
 def __init__(self, test_id, test_uri, req_hdrs, base_uri, 
     format, output_hdrs, output_body, descend=False, save=False):
     self.output_body = output_body
     self.start = time.time()
     timeout = nbhttp.schedule(max_runtime, self.timeoutError)
     if save and save_dir and test_id:
         try:
             os.utime(
                 os.path.join(save_dir, test_id), 
                 (
                     nbhttp.now(), 
                     nbhttp.now() + (save_days * 24 * 60 * 60)
                 )
             )
             location = "?id=%s" % test_id
             if descend:
                 location = "%s&descend=True" % location
             output_hdrs("303 See Other", [
                 ("Location", location)
             ])
             output_body("Redirecting...")
         except (OSError, IOError):
             output_hdrs("500 Internal Server Error", [
                 ("Content-Type", "text/html; charset=%s" % charset), 
             ])
             # TODO: better error message (through formatter?)
             output_body(error_template % "Sorry, I couldn't save that.")
     elif test_id:
         try:
             test_id = os.path.basename(test_id)
             fd = gzip.open(os.path.join(save_dir, test_id))
             mtime = os.fstat(fd.fileno()).st_mtime
         except (OSError, IOError, zlib.error):
             output_hdrs("404 Not Found", [
                 ("Content-Type", "text/html; charset=%s" % charset), 
                 ("Cache-Control", "max-age=600, must-revalidate")
             ])
             # TODO: better error page (through formatter?)
             self.output_body(error_template % 
                 "I'm sorry, I can't find that saved response."
             )
             timeout.delete()
             return
         is_saved = mtime > nbhttp.now()
         try:
             ired = pickle.load(fd)
         except (pickle.PickleError, EOFError):
             output_hdrs("500 Internal Server Error", [
                 ("Content-Type", "text/html; charset=%s" % charset), 
                 ("Cache-Control", "max-age=600, must-revalidate")
             ])
             # TODO: better error page (through formatter?)
             self.output_body(error_template % 
                 "I'm sorry, I had a problem reading that response."
             )
             timeout.delete()
             return
         finally:
             fd.close()
         formatter = find_formatter(format, 'html', descend)(
             base_uri, ired.uri, ired.orig_req_hdrs, lang, self.output,
             allow_save=(not is_saved), is_saved=True, test_id=test_id
         )
         output_hdrs("200 OK", [
             ("Content-Type", "%s; charset=%s" % (
                 formatter.media_type, charset)), 
             ("Cache-Control", "max-age=3600, must-revalidate")
         ])
         formatter.start_output()
         formatter.finish_output(ired)
     elif test_uri:
         if save_dir and os.path.exists(save_dir):
             try:
                 fd, path = tempfile.mkstemp(prefix='', dir=save_dir)
                 test_id = os.path.split(path)[1]
             except (OSError, IOError):
                 # Don't try to store it. 
                 test_id = None
         else:
             test_id = None
         formatter = find_formatter(format, 'html', descend)(
             base_uri, test_uri, req_hdrs, lang, self.output,
             allow_save=test_id, is_saved=False, test_id=test_id,
             descend=descend
         )
         output_hdrs("200 OK", [
             ("Content-Type", "%s; charset=%s" % (
                 formatter.media_type, charset)), 
             ("Cache-Control", "max-age=60, must-revalidate")
         ])
         formatter.start_output()
         ired = droid.InspectingResourceExpertDroid(
             test_uri,
             req_hdrs=req_hdrs,
             status_cb=formatter.status,
             body_procs=[formatter.feed],
             descend=descend
         )
         formatter.finish_output(ired)
         if test_id:
             try:
                 tmp_file = gzip.open(path, 'w')
                 pickle.dump(ired, tmp_file)
                 tmp_file.close()
             except (IOError, zlib.error, pickle.PickleError):
                 pass # we don't cry if we can't store it.
     else:  # no test_uri
         formatter = html.BaseHtmlFormatter(
             base_uri, test_uri, req_hdrs, lang, self.output)
         output_hdrs("200 OK", [
             ("Content-Type", "%s; charset=%s" % (
                 formatter.media_type, charset)
             ), 
             ("Cache-Control", "max-age=300")
         ])
         formatter.start_output()
         formatter.finish_output(None)
     timeout.delete()
Beispiel #15
0
    def __init__(
        self,
        config: SectionProxy,
        method: str,
        query_string: bytes,
        req_headers: RawHeaderListType,
        req_body: bytes,
        exchange: HttpResponseExchange,
        error_log: Callable[[str], int] = sys.stderr.write,
    ) -> None:
        self.config = config  # type: SectionProxy
        self.query_string = parse_qs(
            query_string.decode(self.config["charset"], "replace"))
        self.req_headers = req_headers
        self.req_body = req_body
        self.body_args = {}
        self.exchange = exchange
        self.error_log = error_log  # function to log errors to

        # query processing
        self.test_uri = self.query_string.get("uri", [""])[0]
        self.test_id = self.query_string.get("id", [None])[0]
        self.req_hdrs = [
            tuple(h.split(":", 1))  # type: ignore
            for h in self.query_string.get("req_hdr", []) if h.find(":") > 0
        ]  # type: StrHeaderListType
        self.format = self.query_string.get("format", ["html"])[0]
        self.descend = "descend" in self.query_string
        self.check_name = None  # type: str
        if not self.descend:
            self.check_name = self.query_string.get("check_name", [None])[0]

        self.charset_bytes = self.config["charset"].encode("ascii")

        self.save_path = None  # type: str
        self.timeout = None  # type: Any

        self.start = time.time()

        if method == "POST":
            req_ct = get_header(self.req_headers, b"content-type")
            if req_ct and req_ct[-1].lower(
            ) == b"application/x-www-form-urlencoded":
                self.body_args = parse_qs(
                    req_body.decode(self.config["charset"], "replace"))

            if ("save" in self.query_string
                    and self.config.get("save_dir", "") and self.test_id):
                extend_saved_test(self)
            elif "slack" in self.query_string:
                slack_run(self)
            elif "client_error" in self.query_string:
                self.dump_client_error()
            elif self.test_uri:
                self.run_test()
            else:
                self.show_default()
        elif method in ["GET", "HEAD"]:
            if self.test_id:
                load_saved_test(self)
            elif "code" in self.query_string:
                slack_auth(self)
            else:
                self.show_default()
        else:
            self.error_response(
                find_formatter("html")(self.config, None, self.output),
                b"405",
                b"Method Not Allowed",
                "Method Not Allowed",
            )
Beispiel #16
0
    def run_test(self) -> None:
        """Test a URI."""
        self.test_id = init_save_file(self)
        top_resource = HttpResource(self.config, descend=self.descend)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, "html", self.descend)(
            self.config,
            top_resource,
            self.output,
            allow_save=self.test_id,
            is_saved=False,
            test_id=self.test_id,
            descend=self.descend,
        )
        continue_test = partial(self.continue_test, top_resource, formatter)
        error_response = partial(self.error_response, formatter)

        self.timeout = thor.schedule(
            int(self.config["max_runtime"]),
            self.timeoutError,
            top_resource.show_task_map,
        )

        # referer limiting
        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == "referer":
                referers.append(value)
        referer_error = None

        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."

        referer_spam_domains = [
            i.strip() for i in self.config.get("referer_spam_domains",
                                               fallback="").split()
        ]
        if (referer_spam_domains and referers
                and urlsplit(referers[0]).hostname in referer_spam_domains):
            referer_error = "Referer not allowed."

        if referer_error:
            error_response(b"403", b"Forbidden", referer_error)
            return

        # enforce client limits
        try:
            ratelimiter.process(self, error_response)
        except ValueError:
            return  # over limit, don't continue.

        # hCaptcha
        if self.config.get("hcaptcha_sitekey", "") and self.config.get(
                "hcaptcha_secret", ""):
            CaptchaHandler(
                self,
                self.get_client_id(),
                continue_test,
                error_response,
            ).run()
        else:
            continue_test()