Exemple #1
0
    def get(self, url):
        url = url.replace('-', '://', 1)
        mirrored_url = transform_content.decodeUrl(url)

        base_url = re.search(r"/*([^/]+)", mirrored_url).group(1);
        assert base_url
        logging.info("Proxy serving request for '%s'", mirrored_url)

        client = Client()
        success = client.go(base_url, mirrored_url, self.request.body, self.request.headers)
        if not success:
            return self.error(404)

        cres = client.response
        self.writeHeaders(cres)
        content = self.getContent(cres)
        if content:
            logging.debug("Len: %dB", len(content))
            self.response.write(content)
        self.response.set_status(cres.status_code)
Exemple #2
0
    def get(self, raw_address):
        if raw_address == 'favicon.ico' or raw_address == 'none':
            return self.error(404)

        translated_address = transform_content.decodeUrl(raw_address)

        base_url = re.search(r"/*([^/]+)", translated_address).group(1);

        assert base_url

        # check for request without a base path, includes the referer base path if necessary
        if raw_address == translated_address and 'Referer' in self.request.headers:
            referer = transform_content.decodeUrl(self.request.headers['Referer'])
            refmo = re.search(r"://[^/]+/+([^/]+)", referer)
            if refmo is not None:
                refbase = refmo.group(1);
                if base_url != refbase:
                    logging.debug("Basing address on: %s", refbase)
                    translated_address = refbase + '/' + translated_address
                    base_url = refbase

        # Log the user-agent and referrer, to see who is linking to us.
        logging.debug('User-Agent = "%s", Referrer = "%s"',
                      self.request.user_agent,
                      transform_content.decodeUrl(self.request.referer))
        logging.debug('Base_url = "%s", url = "%s"', base_url, self.request.url)

        mirrored_url = HTTP_PREFIX + translated_address

        logging.info("Handling request for '%s'", mirrored_url)

        client = Client()
        success = client.go(base_url, mirrored_url, self.request.body, self.request.headers)
        if not success:
            return self.error(404)

        cres = client.response

        for key, value in cres.headers.iteritems():
            if key not in IGNORE_HEADERS:
                if key.lower() == 'location':
                    # redirection
                    if not value.startswith('http://') and not value.startswith('https://'):
                        logging.debug('Adjusting Location: %s', value)
                        value = urlparse.urljoin(mirrored_url, value)

                    logging.debug("Location: %s", value)
                    value = urlparse.urljoin(self.request.uri, transform_content.encodeUrl(re.sub(r"^https?://", "/", value)))
                    logging.info("Redirecting to '%s'", value)
                self.response.headers[key] = value

        content = cres.content
        if content:
            page_content_type = cres.headers.get("content-type", "")
            for content_type in TRANSFORMED_CONTENT_TYPES:
                # Startswith() because there could be a 'charset=UTF-8' in the header.
                if page_content_type.startswith(content_type):
                    content = transform_content.TransformContent(base_url, mirrored_url, content)
                    break
            logging.debug("Len: %dB", len(content))
            self.response.write(content)

        self.response.set_status(cres.status_code)