예제 #1
0
파일: webui.py 프로젝트: optionalg/redbot
    def robots_precheck(self, iri: str) -> bool:
        """
        If we have the robots.txt file available, check it to see if the
        request is permissible.

        This does not fetch robots.txt.
        """
        robot_fetcher = RobotFetcher()
        try:
            return robot_fetcher.check_robots(HttpRequest.iri_to_uri(iri), sync=True)
        except (UnicodeError, ValueError):
            return True
예제 #2
0
파일: webui.py 프로젝트: jugglinmike/redbot
    def robots_precheck(self, iri: str) -> bool:
        """
        If we have the robots.txt file available, check it to see if the
        request is permissible.

        This does not fetch robots.txt.
        """
        robot_fetcher = RobotFetcher()
        try:
            return robot_fetcher.check_robots(HttpRequest.iri_to_uri(iri),
                                              sync=True)
        except (UnicodeError, ValueError):
            return True
예제 #3
0
파일: webui.py 프로젝트: mnot/redbot
    def run_test(self) -> None:
        """Test a URI."""
        # try to initialise stored test results
        if self.config.get('save_dir', "") and os.path.exists(self.config['save_dir']):
            try:
                fd, self.save_path = tempfile.mkstemp(prefix='', dir=self.config['save_dir'])
                self.test_id = os.path.split(self.save_path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                self.test_id = None # should already be None, but make sure

        top_resource = HttpResource(self.config, descend=self.descend)
        self.timeout = thor.schedule(int(self.config['max_runtime']), self.timeoutError,
                                     top_resource.show_task_map)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, 'html', self.descend)(
            self.config, self.output, allow_save=self.test_id, is_saved=False,
            test_id=self.test_id, descend=self.descend)

        # referer limiting
        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in self.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(b"403", b"Forbidden", [
                (b"Content-Type", formatter.content_type()),
                (b"Cache-Control", b"max-age=360, must-revalidate")])
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        # robot human check
        if self.robot_time and self.robot_time.isdigit() and self.robot_hmac:
            valid_till = int(self.robot_time)
            computed_hmac = hmac.new(self._robot_secret, bytes(self.robot_time, 'ascii'))
            is_valid = self.robot_hmac == computed_hmac.hexdigest()
            if is_valid and valid_till >= thor.time():
                self.continue_test(top_resource, formatter)
                return
            else:
                self.response_start(b"403", b"Forbidden", [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"max-age=60, must-revalidate")])
                formatter.start_output()
                formatter.error_output("Naughty.")
                self.response_done([])
                self.error_log("Naughty robot key.")

        # enforce client limits
        if self.config.getint('limit_client_tests', fallback=0):
            client_id = self.get_client_id()
            if client_id:
                if self._client_counts.get(client_id, 0) > \
                  self.config.getint('limit_client_tests'):
                    self.response_start(b"429", b"Too Many Requests", [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate")])
                    formatter.start_output()
                    formatter.error_output("Your client is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("client over limit: %s" % client_id.decode('idna'))
                    return
                self._client_counts[client_id] += 1

        # enforce origin limits
        if self.config.getint('limit_origin_tests', fallback=0):
            origin = url_to_origin(self.test_uri)
            if origin:
                if self._origin_counts.get(origin, 0) > \
                  self.config.getint('limit_origin_tests'):
                    self.response_start(b"429", b"Too Many Requests", [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate")])
                    formatter.start_output()
                    formatter.error_output("Origin is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("origin over limit: %s" % origin)
                    return
                self._origin_counts[origin] += 1

        # check robots.txt
        robot_fetcher = RobotFetcher(self.config)
        @thor.events.on(robot_fetcher)
        def robot(results: Tuple[str, bool]) -> None:
            url, robot_ok = results
            if robot_ok:
                self.continue_test(top_resource, formatter)
            else:
                valid_till = str(int(thor.time()) + 60)
                robot_hmac = hmac.new(self._robot_secret, bytes(valid_till, 'ascii'))
                self.response_start(b"403", b"Forbidden", [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"no-cache")])
                formatter.start_output()
                formatter.error_output("This site doesn't allow robots. If you are human, please <a href='?uri=%s&robot_time=%s&robot_hmac=%s'>click here</a>." % (self.test_uri, valid_till, robot_hmac.hexdigest()) )
                self.response_done([])

        robot_fetcher.check_robots(HttpRequest.iri_to_uri(self.test_uri))
예제 #4
0
파일: webui.py 프로젝트: kom0055/redbot
    def run_test(self) -> None:
        """Test a URI."""
        # try to initialise stored test results
        if self.config.get("save_dir", "") and os.path.exists(
                self.config["save_dir"]):
            try:
                fd, self.save_path = tempfile.mkstemp(
                    prefix="", dir=self.config["save_dir"])
                self.test_id = os.path.split(self.save_path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                self.test_id = None  # should already be None, but make sure

        top_resource = HttpResource(self.config, descend=self.descend)
        self.timeout = thor.schedule(
            int(self.config["max_runtime"]),
            self.timeoutError,
            top_resource.show_task_map,
        )
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, "html", self.descend)(
            self.config,
            self.output,
            allow_save=self.test_id,
            is_saved=False,
            test_id=self.test_id,
            descend=self.descend,
        )

        # referer limiting
        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == "referer":
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(
                referers[0]).hostname in self.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(
                b"403",
                b"Forbidden",
                [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"max-age=360, must-revalidate"),
                ],
            )
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        # robot human check
        if self.robot_time and self.robot_time.isdigit() and self.robot_hmac:
            valid_till = int(self.robot_time)
            computed_hmac = hmac.new(self._robot_secret,
                                     bytes(self.robot_time, "ascii"))
            is_valid = self.robot_hmac == computed_hmac.hexdigest()
            if is_valid and valid_till >= thor.time():
                self.continue_test(top_resource, formatter)
                return
            else:
                self.response_start(
                    b"403",
                    b"Forbidden",
                    [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate"),
                    ],
                )
                formatter.start_output()
                formatter.error_output("Naughty.")
                self.response_done([])
                self.error_log("Naughty robot key.")

        # enforce client limits
        if self.config.getint("limit_client_tests", fallback=0):
            client_id = self.get_client_id()
            if client_id:
                if self._client_counts.get(
                        client_id,
                        0) > self.config.getint("limit_client_tests"):
                    self.response_start(
                        b"429",
                        b"Too Many Requests",
                        [
                            (b"Content-Type", formatter.content_type()),
                            (b"Cache-Control", b"max-age=60, must-revalidate"),
                        ],
                    )
                    formatter.start_output()
                    formatter.error_output(
                        "Your client is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("client over limit: %s" %
                                   client_id.decode("idna"))
                    return
                self._client_counts[client_id] += 1

        # enforce origin limits
        if self.config.getint("limit_origin_tests", fallback=0):
            origin = url_to_origin(self.test_uri)
            if origin:
                if self._origin_counts.get(
                        origin, 0) > self.config.getint("limit_origin_tests"):
                    self.response_start(
                        b"429",
                        b"Too Many Requests",
                        [
                            (b"Content-Type", formatter.content_type()),
                            (b"Cache-Control", b"max-age=60, must-revalidate"),
                        ],
                    )
                    formatter.start_output()
                    formatter.error_output(
                        "Origin is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("origin over limit: %s" % origin)
                    return
                self._origin_counts[origin] += 1

        # check robots.txt
        robot_fetcher = RobotFetcher(self.config)

        @thor.events.on(robot_fetcher)
        def robot(results: Tuple[str, bool]) -> None:
            url, robot_ok = results
            if robot_ok:
                self.continue_test(top_resource, formatter)
            else:
                valid_till = str(int(thor.time()) + 60)
                robot_hmac = hmac.new(self._robot_secret,
                                      bytes(valid_till, "ascii"))
                self.response_start(
                    b"403",
                    b"Forbidden",
                    [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"no-cache"),
                    ],
                )
                formatter.start_output()
                formatter.error_output(
                    "This site doesn't allow robots. If you are human, please <a href='?uri=%s&robot_time=%s&robot_hmac=%s'>click here</a>."
                    % (self.test_uri, valid_till, robot_hmac.hexdigest()))
                self.response_done([])

        robot_fetcher.check_robots(HttpRequest.iri_to_uri(self.test_uri))