def post(self): # Handle the input form to redirect the user to a relative url form_url = self.request.get("url") logging.info("Request url: %s", form_url) if form_url: # Accept URLs that still have a leading 'http://' inputted_url = urllib.unquote(form_url) if inputted_url.startswith(HTTP_PREFIX): inputted_url = inputted_url[len(HTTP_PREFIX):] return self.redirect("/" + transform_content.encodeUrl(inputted_url)) template = jinja_env.get_template('main.html') self.response.write(template.render())
def get(self, raw_address): if raw_address == 'favicon.ico' or raw_address == 'none': return self.error(404) translated_address = transform_content.decodeUrl(raw_address) base_url = re.search(r"/*([^/]+)", translated_address).group(1); assert base_url # check for request without a base path, includes the referer base path if necessary if raw_address == translated_address and 'Referer' in self.request.headers: referer = transform_content.decodeUrl(self.request.headers['Referer']) refmo = re.search(r"://[^/]+/+([^/]+)", referer) if refmo is not None: refbase = refmo.group(1); if base_url != refbase: logging.debug("Basing address on: %s", refbase) translated_address = refbase + '/' + translated_address base_url = refbase # Log the user-agent and referrer, to see who is linking to us. logging.debug('User-Agent = "%s", Referrer = "%s"', self.request.user_agent, transform_content.decodeUrl(self.request.referer)) logging.debug('Base_url = "%s", url = "%s"', base_url, self.request.url) mirrored_url = HTTP_PREFIX + translated_address logging.info("Handling request for '%s'", mirrored_url) client = Client() success = client.go(base_url, mirrored_url, self.request.body, self.request.headers) if not success: return self.error(404) cres = client.response for key, value in cres.headers.iteritems(): if key not in IGNORE_HEADERS: if key.lower() == 'location': # redirection if not value.startswith('http://') and not value.startswith('https://'): logging.debug('Adjusting Location: %s', value) value = urlparse.urljoin(mirrored_url, value) logging.debug("Location: %s", value) value = urlparse.urljoin(self.request.uri, transform_content.encodeUrl(re.sub(r"^https?://", "/", value))) logging.info("Redirecting to '%s'", value) self.response.headers[key] = value content = cres.content if content: page_content_type = cres.headers.get("content-type", "") for content_type in TRANSFORMED_CONTENT_TYPES: # Startswith() because there could be a 'charset=UTF-8' in the header. if page_content_type.startswith(content_type): content = transform_content.TransformContent(base_url, mirrored_url, content) break logging.debug("Len: %dB", len(content)) self.response.write(content) self.response.set_status(cres.status_code)