Example #1
0
 def show_default(self) -> None:
     """Show the default page."""
     formatter = html.BaseHtmlFormatter(self.config,
                                        None,
                                        self.output,
                                        is_blank=self.test_uri == "")
     if self.test_uri:
         top_resource = HttpResource(self.config, descend=self.descend)
         top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
         if self.check_name:
             formatter.resource = top_resource.subreqs.get(
                 self.check_name, top_resource)
         else:
             formatter.resource = top_resource
     self.exchange.response_start(
         b"200",
         b"OK",
         [
             (b"Content-Type", formatter.content_type()),
             (b"Cache-Control", b"max-age=300"),
         ],
     )
     formatter.start_output()
     formatter.finish_output()
     self.exchange.response_done([])
Example #2
0
File: webui.py Project: mnot/redbot
    def continue_test(self, top_resource: HttpResource, formatter: Formatter) -> None:
        "Preliminary checks are done; actually run the test."
        @thor.events.on(formatter)
        def formatter_done() -> None:
            self.response_done([])
            if self.test_id:
                try:
                    tmp_file = gzip.open(self.save_path, 'w')
                    pickle.dump(top_resource, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass # we don't cry if we can't store it.

            # log excessive traffic
            ti = sum([i.transfer_in for i, t in top_resource.linked],
                     top_resource.transfer_in)
            to = sum([i.transfer_out for i, t in top_resource.linked],
                     top_resource.transfer_out)
            if ti + to > int(self.config['log_traffic']) * 1024:
                self.error_log("%iK in %iK out for <%s> (descend %s)" % (
                    ti / 1024, to / 1024, e_url(self.test_uri), str(self.descend)))

        self.response_start(b"200", b"OK", [
            (b"Content-Type", formatter.content_type()),
            (b"Cache-Control", b"max-age=60, must-revalidate")])
        if self.check_name:
            display_resource = top_resource.subreqs.get(self.check_name, top_resource)
        else:
            display_resource = top_resource
        formatter.bind_resource(display_resource)
        top_resource.check()
Example #3
0
    def run_test(self):
        """Test a URI."""
        if save_dir and os.path.exists(save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='', dir=save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it. 
                test_id = None
        else:
            test_id = None

        formatter = find_formatter(self.format, 'html', self.descend)(
            self.base_uri, self.test_uri, self.req_hdrs, lang,
            self.output, allow_save=test_id, is_saved=False,
            test_id=test_id, descend=self.descend
        )

        self.response_start(
            "200", "OK", [
            ("Content-Type", "%s; charset=%s" % (
                formatter.media_type, charset)), 
            ("Cache-Control", "max-age=60, must-revalidate")
        ])
        
        ired = HttpResource(
            self.test_uri,
            req_hdrs=self.req_hdrs,
            status_cb=formatter.status,
            body_procs=[formatter.feed],
            descend=self.descend
        )
#        sys.stdout.write(pickle.dumps(ired))
        formatter.start_output()

        def done():
            if self.check_type:
            # TODO: catch errors
                state = ired.subreqs.get(self.check_type, None)
            else:
                state = ired
            formatter.set_state(state)
            formatter.finish_output()
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(ired, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass # we don't cry if we can't store it.
#            objgraph.show_growth()        
        ired.run(done)
Example #4
0
    def continue_test(
        self,
        top_resource: HttpResource,
        formatter: Formatter,
        extra_headers: RawHeaderListType = [],
    ) -> None:
        "Preliminary checks are done; actually run the test."

        @thor.events.on(formatter)
        def formatter_done() -> None:
            if self.timeout:
                self.timeout.delete()
                self.timeout = None
            self.exchange.response_done([])
            save_test(self, top_resource)

            # log excessive traffic
            ti = sum(
                [i.transfer_in for i, t in top_resource.linked],
                top_resource.transfer_in,
            )
            to = sum(
                [i.transfer_out for i, t in top_resource.linked],
                top_resource.transfer_out,
            )
            if ti + to > int(self.config["log_traffic"]) * 1024:
                self.error_log(
                    f"{ti / 1024:n}K in {to / 1024:n}K out for <{e_url(self.test_uri)}> (descend {self.descend})"
                )

        self.exchange.response_start(
            b"200",
            b"OK",
            [
                (b"Content-Type", formatter.content_type()),
                (b"Cache-Control", b"max-age=60, must-revalidate"),
            ] + extra_headers,
        )
        if self.check_name:
            display_resource = top_resource.subreqs.get(
                self.check_name, top_resource)
        else:
            display_resource = top_resource
        formatter.bind_resource(display_resource)
        top_resource.check()
Example #5
0
    def continue_test(self, top_resource: HttpResource,
                      formatter: Formatter) -> None:
        "Preliminary checks are done; actually run the test."

        @thor.events.on(formatter)
        def formatter_done() -> None:
            self.response_done([])
            if self.test_id:
                try:
                    tmp_file = gzip.open(self.save_path, "w")
                    pickle.dump(top_resource, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass  # we don't cry if we can't store it.

            # log excessive traffic
            ti = sum(
                [i.transfer_in for i, t in top_resource.linked],
                top_resource.transfer_in,
            )
            to = sum(
                [i.transfer_out for i, t in top_resource.linked],
                top_resource.transfer_out,
            )
            if ti + to > int(self.config["log_traffic"]) * 1024:
                self.error_log("%iK in %iK out for <%s> (descend %s)" %
                               (ti / 1024, to / 1024, e_url(
                                   self.test_uri), str(self.descend)))

        self.response_start(
            b"200",
            b"OK",
            [
                (b"Content-Type", formatter.content_type()),
                (b"Cache-Control", b"max-age=60, must-revalidate"),
            ],
        )
        if self.check_name:
            display_resource = top_resource.subreqs.get(
                self.check_name, top_resource)
        else:
            display_resource = top_resource
        formatter.bind_resource(display_resource)
        top_resource.check()
Example #6
0
    def run_redbot(self, url, method, body, headers):
        red = HttpResource(url, method=method, req_body=body, req_hdrs=headers)

        def work():
            red.run(thor.stop)
            thor.run()
            self.io_loop.add_callback(self.stop)

        thread = threading.Thread(target=work)
        thread.start()
        self.wait()
        thread.join()
        return red
Example #7
0
def slack_run(webui: "RedWebUi") -> None:
    """Handle a slack request."""
    slack_response_uri = webui.body_args.get("response_url", [""])[0].strip()
    formatter = slack.SlackFormatter(webui.config,
                                     None,
                                     webui.output,
                                     slack_uri=slack_response_uri)
    webui.test_uri = webui.body_args.get("text", [""])[0].strip()

    webui.exchange.response_start(
        b"200",
        b"OK",
        [
            (b"Content-Type", formatter.content_type()),
            (b"Cache-Control", b"max-age=300"),
        ],
    )
    webui.output(
        json.dumps({
            "response_type": "ephemeral",
            "text": f"_Checking_ {webui.test_uri} _..._",
        }))
    webui.exchange.response_done([])

    top_resource = HttpResource(webui.config)
    top_resource.set_request(webui.test_uri, req_hdrs=webui.req_hdrs)
    formatter.bind_resource(top_resource)
    if not verify_slack_secret(webui):
        webui.error_response(
            formatter,
            b"403",
            b"Forbidden",
            "Incorrect Slack Authentication.",
            "Bad slack token.",
        )
        return
    webui.timeout = thor.schedule(int(webui.config["max_runtime"]),
                                  formatter.timeout)

    @thor.events.on(formatter)
    def formatter_done() -> None:
        if webui.timeout:
            webui.timeout.delete()
            webui.timeout = None
        save_test(webui, top_resource)

    top_resource.check()
Example #8
0
    def run_test(self) -> None:
        """Test a URI."""
        if self.config.save_dir and os.path.exists(self.config.save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='', dir=self.config.save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        top_resource = HttpResource(descend=self.descend)
        self.timeout = thor.schedule(self.config.max_runtime, self.timeoutError,
                                     top_resource.show_task_map)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, 'html', self.descend)(
            self.ui_uri, self.config.lang, self.output,
            allow_save=test_id, is_saved=False, test_id=test_id, descend=self.descend)
        content_type = "%s; charset=%s" % (formatter.media_type, self.config.charset)
        if self.check_name:
            display_resource = top_resource.subreqs.get(self.check_name, top_resource)
        else:
            display_resource = top_resource

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in self.config.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(b"403", b"Forbidden", [
                (b"Content-Type", content_type.encode('ascii')),
                (b"Cache-Control", b"max-age=360, must-revalidate")])
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(b"502", b"Gateway Error", [
                (b"Content-Type", content_type.encode('ascii')),
                (b"Cache-Control", b"max-age=60, must-revalidate")])
            formatter.start_output()
            formatter.error_output("Forbidden by robots.txt.")
            self.response_done([])
            return

        @thor.events.on(formatter)
        def formatter_done() -> None:
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(top_resource, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass # we don't cry if we can't store it.
            ti = sum([i.transfer_in for i, t in top_resource.linked], top_resource.transfer_in)
            to = sum([i.transfer_out for i, t in top_resource.linked], top_resource.transfer_out)
            if ti + to > self.config.log_traffic:
                self.error_log("%iK in %iK out for <%s> (descend %s)" % (
                    ti / 1024, to / 1024, e_url(self.test_uri), str(self.descend)))

        self.response_start(b"200", b"OK", [
            (b"Content-Type", content_type.encode('ascii')),
            (b"Cache-Control", b"max-age=60, must-revalidate")])
        formatter.bind_resource(display_resource)
        top_resource.check()
Example #9
0
    def run_test(self):
        """Test a URI."""
        if save_dir and os.path.exists(save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='', dir=save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        formatter = find_formatter(self.format, 'html', self.descend)(
            self.base_uri, self.test_uri, self.req_hdrs, lang,
            self.output, allow_save=test_id, is_saved=False,
            check_type=self.check_type, test_id=test_id, descend=self.descend
        )

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in referer_spam_domains:
            referer_error = "Referer now allowed."
        if referer_error:
            self.response_start(
                "403", "Forbidden", [
                ("Content-Type", "%s; charset=%s" % (
                    formatter.media_type, charset)),
                ("Cache-Control", "max-age=360, must-revalidate")
            ])
            formatter.start_output()
            self.output(error_template % referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(
                "502", "Gateway Error", [
                ("Content-Type", "%s; charset=%s" % (
                    formatter.media_type, charset)),
                ("Cache-Control", "max-age=60, must-revalidate")
            ])
            formatter.start_output()
            self.output(error_template % "Forbidden by robots.txt.")
            self.response_done([])
            return

        self.response_start(
            "200", "OK", [
            ("Content-Type", "%s; charset=%s" % (
                formatter.media_type, charset)),
            ("Cache-Control", "max-age=60, must-revalidate")
        ])

        resource = HttpResource(
            self.test_uri,
            req_hdrs=self.req_hdrs,
            status_cb=formatter.status,
            body_procs=[formatter.feed],
            descend=self.descend
        )
#        sys.stdout.write(pickle.dumps(resource))
        formatter.start_output()

        def done():
            if self.check_type:
                state = resource.subreqs.get(self.check_type, resource)
            else:
                state = resource
            formatter.set_state(state)
            formatter.finish_output()
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(resource, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass # we don't cry if we can't store it.
#            objgraph.show_growth()
            ti = sum([i.transfer_in for i,t in resource.linked], resource.transfer_in)
            to = sum([i.transfer_out for i,t in resource.linked], resource.transfer_out)
            if ti + to > log_traffic:
                sys.stderr.write("%iK in %iK out for <%s> (descend %s)" % (
                    ti / 1024,
                    to / 1024,
                    self.test_uri,
                    str(self.descend)
                ))

        resource.run(done)
Example #10
0
File: webui.py Project: mnot/redbot
    def run_test(self) -> None:
        """Test a URI."""
        # try to initialise stored test results
        if self.config.get('save_dir', "") and os.path.exists(self.config['save_dir']):
            try:
                fd, self.save_path = tempfile.mkstemp(prefix='', dir=self.config['save_dir'])
                self.test_id = os.path.split(self.save_path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                self.test_id = None # should already be None, but make sure

        top_resource = HttpResource(self.config, descend=self.descend)
        self.timeout = thor.schedule(int(self.config['max_runtime']), self.timeoutError,
                                     top_resource.show_task_map)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, 'html', self.descend)(
            self.config, self.output, allow_save=self.test_id, is_saved=False,
            test_id=self.test_id, descend=self.descend)

        # referer limiting
        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in self.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(b"403", b"Forbidden", [
                (b"Content-Type", formatter.content_type()),
                (b"Cache-Control", b"max-age=360, must-revalidate")])
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        # robot human check
        if self.robot_time and self.robot_time.isdigit() and self.robot_hmac:
            valid_till = int(self.robot_time)
            computed_hmac = hmac.new(self._robot_secret, bytes(self.robot_time, 'ascii'))
            is_valid = self.robot_hmac == computed_hmac.hexdigest()
            if is_valid and valid_till >= thor.time():
                self.continue_test(top_resource, formatter)
                return
            else:
                self.response_start(b"403", b"Forbidden", [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"max-age=60, must-revalidate")])
                formatter.start_output()
                formatter.error_output("Naughty.")
                self.response_done([])
                self.error_log("Naughty robot key.")

        # enforce client limits
        if self.config.getint('limit_client_tests', fallback=0):
            client_id = self.get_client_id()
            if client_id:
                if self._client_counts.get(client_id, 0) > \
                  self.config.getint('limit_client_tests'):
                    self.response_start(b"429", b"Too Many Requests", [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate")])
                    formatter.start_output()
                    formatter.error_output("Your client is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("client over limit: %s" % client_id.decode('idna'))
                    return
                self._client_counts[client_id] += 1

        # enforce origin limits
        if self.config.getint('limit_origin_tests', fallback=0):
            origin = url_to_origin(self.test_uri)
            if origin:
                if self._origin_counts.get(origin, 0) > \
                  self.config.getint('limit_origin_tests'):
                    self.response_start(b"429", b"Too Many Requests", [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate")])
                    formatter.start_output()
                    formatter.error_output("Origin is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("origin over limit: %s" % origin)
                    return
                self._origin_counts[origin] += 1

        # check robots.txt
        robot_fetcher = RobotFetcher(self.config)
        @thor.events.on(robot_fetcher)
        def robot(results: Tuple[str, bool]) -> None:
            url, robot_ok = results
            if robot_ok:
                self.continue_test(top_resource, formatter)
            else:
                valid_till = str(int(thor.time()) + 60)
                robot_hmac = hmac.new(self._robot_secret, bytes(valid_till, 'ascii'))
                self.response_start(b"403", b"Forbidden", [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"no-cache")])
                formatter.start_output()
                formatter.error_output("This site doesn't allow robots. If you are human, please <a href='?uri=%s&robot_time=%s&robot_hmac=%s'>click here</a>." % (self.test_uri, valid_till, robot_hmac.hexdigest()) )
                self.response_done([])

        robot_fetcher.check_robots(HttpRequest.iri_to_uri(self.test_uri))
Example #11
0
    def run_test(self) -> None:
        """Test a URI."""
        if self.config.save_dir and os.path.exists(self.config.save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='',
                                            dir=self.config.save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        top_resource = HttpResource(descend=self.descend)
        self.timeout = thor.schedule(self.config.max_runtime,
                                     self.timeoutError,
                                     top_resource.show_task_map)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, 'html',
                                   self.descend)(self.ui_uri,
                                                 self.config.lang,
                                                 self.output,
                                                 allow_save=test_id,
                                                 is_saved=False,
                                                 test_id=test_id,
                                                 descend=self.descend)
        content_type = "%s; charset=%s" % (formatter.media_type,
                                           self.config.charset)
        if self.check_name:
            display_resource = top_resource.subreqs.get(
                self.check_name, top_resource)
        else:
            display_resource = top_resource

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(
                referers[0]).hostname in self.config.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(
                b"403", b"Forbidden",
                [(b"Content-Type", content_type.encode('ascii')),
                 (b"Cache-Control", b"max-age=360, must-revalidate")])
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(
                b"502", b"Gateway Error",
                [(b"Content-Type", content_type.encode('ascii')),
                 (b"Cache-Control", b"max-age=60, must-revalidate")])
            formatter.start_output()
            formatter.error_output("Forbidden by robots.txt.")
            self.response_done([])
            return

        @thor.events.on(formatter)
        def formatter_done() -> None:
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(top_resource, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass  # we don't cry if we can't store it.
            ti = sum([i.transfer_in for i, t in top_resource.linked],
                     top_resource.transfer_in)
            to = sum([i.transfer_out for i, t in top_resource.linked],
                     top_resource.transfer_out)
            if ti + to > self.config.log_traffic:
                self.error_log("%iK in %iK out for <%s> (descend %s)" %
                               (ti / 1024, to / 1024, e_url(
                                   self.test_uri), str(self.descend)))

        self.response_start(
            b"200", b"OK",
            [(b"Content-Type", content_type.encode('ascii')),
             (b"Cache-Control", b"max-age=60, must-revalidate")])
        formatter.bind_resource(display_resource)
        top_resource.check()
Example #12
0
    def run_test(self) -> None:
        """Test a URI."""
        # try to initialise stored test results
        if self.config.get("save_dir", "") and os.path.exists(
                self.config["save_dir"]):
            try:
                fd, self.save_path = tempfile.mkstemp(
                    prefix="", dir=self.config["save_dir"])
                self.test_id = os.path.split(self.save_path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                self.test_id = None  # should already be None, but make sure

        top_resource = HttpResource(self.config, descend=self.descend)
        self.timeout = thor.schedule(
            int(self.config["max_runtime"]),
            self.timeoutError,
            top_resource.show_task_map,
        )
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, "html", self.descend)(
            self.config,
            self.output,
            allow_save=self.test_id,
            is_saved=False,
            test_id=self.test_id,
            descend=self.descend,
        )

        # referer limiting
        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == "referer":
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(
                referers[0]).hostname in self.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(
                b"403",
                b"Forbidden",
                [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"max-age=360, must-revalidate"),
                ],
            )
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        # robot human check
        if self.robot_time and self.robot_time.isdigit() and self.robot_hmac:
            valid_till = int(self.robot_time)
            computed_hmac = hmac.new(self._robot_secret,
                                     bytes(self.robot_time, "ascii"))
            is_valid = self.robot_hmac == computed_hmac.hexdigest()
            if is_valid and valid_till >= thor.time():
                self.continue_test(top_resource, formatter)
                return
            else:
                self.response_start(
                    b"403",
                    b"Forbidden",
                    [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate"),
                    ],
                )
                formatter.start_output()
                formatter.error_output("Naughty.")
                self.response_done([])
                self.error_log("Naughty robot key.")

        # enforce client limits
        if self.config.getint("limit_client_tests", fallback=0):
            client_id = self.get_client_id()
            if client_id:
                if self._client_counts.get(
                        client_id,
                        0) > self.config.getint("limit_client_tests"):
                    self.response_start(
                        b"429",
                        b"Too Many Requests",
                        [
                            (b"Content-Type", formatter.content_type()),
                            (b"Cache-Control", b"max-age=60, must-revalidate"),
                        ],
                    )
                    formatter.start_output()
                    formatter.error_output(
                        "Your client is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("client over limit: %s" %
                                   client_id.decode("idna"))
                    return
                self._client_counts[client_id] += 1

        # enforce origin limits
        if self.config.getint("limit_origin_tests", fallback=0):
            origin = url_to_origin(self.test_uri)
            if origin:
                if self._origin_counts.get(
                        origin, 0) > self.config.getint("limit_origin_tests"):
                    self.response_start(
                        b"429",
                        b"Too Many Requests",
                        [
                            (b"Content-Type", formatter.content_type()),
                            (b"Cache-Control", b"max-age=60, must-revalidate"),
                        ],
                    )
                    formatter.start_output()
                    formatter.error_output(
                        "Origin is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("origin over limit: %s" % origin)
                    return
                self._origin_counts[origin] += 1

        # check robots.txt
        robot_fetcher = RobotFetcher(self.config)

        @thor.events.on(robot_fetcher)
        def robot(results: Tuple[str, bool]) -> None:
            url, robot_ok = results
            if robot_ok:
                self.continue_test(top_resource, formatter)
            else:
                valid_till = str(int(thor.time()) + 60)
                robot_hmac = hmac.new(self._robot_secret,
                                      bytes(valid_till, "ascii"))
                self.response_start(
                    b"403",
                    b"Forbidden",
                    [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"no-cache"),
                    ],
                )
                formatter.start_output()
                formatter.error_output(
                    "This site doesn't allow robots. If you are human, please <a href='?uri=%s&robot_time=%s&robot_hmac=%s'>click here</a>."
                    % (self.test_uri, valid_till, robot_hmac.hexdigest()))
                self.response_done([])

        robot_fetcher.check_robots(HttpRequest.iri_to_uri(self.test_uri))
Example #13
0
    def run_test(self):
        """Test a URI."""
        if save_dir and os.path.exists(save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix="", dir=save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        formatter = find_formatter(self.format, "html", self.descend)(
            self.base_uri,
            self.test_uri,
            self.req_hdrs,
            lang,
            self.output,
            allow_save=test_id,
            is_saved=False,
            test_id=test_id,
            descend=self.descend,
        )

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == "referer":
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in referer_spam_domains:
            referer_error = "Referer now allowed."
        if referer_error:
            self.response_start(
                "403",
                "Forbidden",
                [
                    ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)),
                    ("Cache-Control", "max-age=360, must-revalidate"),
                ],
            )
            formatter.start_output()
            self.output(error_template % referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(
                "502",
                "Gateway Error",
                [
                    ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)),
                    ("Cache-Control", "max-age=60, must-revalidate"),
                ],
            )
            formatter.start_output()
            self.output(error_template % "Forbidden by robots.txt.")
            self.response_done([])
            return

        self.response_start(
            "200",
            "OK",
            [
                ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)),
                ("Cache-Control", "max-age=60, must-revalidate"),
            ],
        )

        ired = HttpResource(
            self.test_uri,
            req_hdrs=self.req_hdrs,
            status_cb=formatter.status,
            body_procs=[formatter.feed],
            descend=self.descend,
        )
        #        sys.stdout.write(pickle.dumps(ired))
        formatter.start_output()

        def done():
            if self.check_type:
                # TODO: catch errors
                state = ired.subreqs.get(self.check_type, None)
            else:
                state = ired
            formatter.set_state(state)
            formatter.finish_output()
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, "w")
                    pickle.dump(ired, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass  # we don't cry if we can't store it.

        #            objgraph.show_growth()
        ired.run(done)
Example #14
0
    def run_test(self) -> None:
        """Test a URI."""
        self.test_id = init_save_file(self)
        top_resource = HttpResource(self.config, descend=self.descend)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, "html", self.descend)(
            self.config,
            top_resource,
            self.output,
            allow_save=self.test_id,
            is_saved=False,
            test_id=self.test_id,
            descend=self.descend,
        )
        continue_test = partial(self.continue_test, top_resource, formatter)
        error_response = partial(self.error_response, formatter)

        self.timeout = thor.schedule(
            int(self.config["max_runtime"]),
            self.timeoutError,
            top_resource.show_task_map,
        )

        # referer limiting
        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == "referer":
                referers.append(value)
        referer_error = None

        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."

        referer_spam_domains = [
            i.strip() for i in self.config.get("referer_spam_domains",
                                               fallback="").split()
        ]
        if (referer_spam_domains and referers
                and urlsplit(referers[0]).hostname in referer_spam_domains):
            referer_error = "Referer not allowed."

        if referer_error:
            error_response(b"403", b"Forbidden", referer_error)
            return

        # enforce client limits
        try:
            ratelimiter.process(self, error_response)
        except ValueError:
            return  # over limit, don't continue.

        # hCaptcha
        if self.config.get("hcaptcha_sitekey", "") and self.config.get(
                "hcaptcha_secret", ""):
            CaptchaHandler(
                self,
                self.get_client_id(),
                continue_test,
                error_response,
            ).run()
        else:
            continue_test()