def process(self): request = Storage() request.headers = self.requestHeaders request.host = self.getRequestHostname() request.uri = self.uri content_length = self.getHeader(b'content-length') transfer_encoding = self.getHeader(b'transfer-encoding') staticpath = request.uri staticpath = re.sub('/$', '/index.html', staticpath) staticpath = re.sub('^(/antanistaticmap/)?', '', staticpath) staticpath = re.sub('^/', '', staticpath) resource_is_local = (config.mode != "TRANSLATION" and (request.host == config.basehost or request.host == 'www.' + config.basehost)) or \ isIPAddress(request.host) or \ isIPv6Address(request.host) or \ (config.overriderobotstxt and request.uri == '/robots.txt') or \ request.uri.startswith('/antanistaticmap/') if content_length is not None: self.bodyProducer.length = int(content_length) producer = self.bodyProducer request.headers.removeHeader(b'content-length') elif transfer_encoding is not None: producer = self.bodyProducer request.headers.removeHeader(b'transfer-encoding') else: producer = None if config.mirror is not None: if config.basehost in config.mirror: config.mirror.remove(config.basehost) if len(config.mirror) > 1: self.var['mirror'] = choice(config.mirror) elif len(config.mirror) == 1: self.var['mirror'] = config.mirror[0] # we serve contents only over https if not self.isSecure() and (config.transport != 'HTTP'): self.redirect("https://" + request.host + request.uri) self.finish() defer.returnValue(None) # 0: Request admission control stage # we try to deny some ua/crawlers regardless the request is (valid or not) / (local or not) # we deny EVERY request to known user agents reconized with pattern matching if config.blockcrawl and request.headers.getRawHeaders(b'user-agent') is not None: for ua in blocked_ua_list: if re.match(ua, request.headers.getRawHeaders(b'user-agent')[0].lower()): self.sendError(403, "error_blocked_ua.tpl") defer.returnValue(NOT_DONE_YET) # 1: Client capability assessment stage if request.headers.getRawHeaders(b'accept-encoding') is not None: if re.search('gzip', request.headers.getRawHeaders(b'accept-encoding')[0]): self.obj.client_supports_gzip = True # 2: Content delivery stage # we need to verify if the requested resource is local (/antanistaticmap/*) or remote # because some checks must be done only for remote requests; # in fact local content is always served (css, js, and png in fact are used in errors) if resource_is_local: # the requested resource is local, we deliver it directly try: if staticpath == "dev/null": content = "A" * random.randint(20, 1024) self.setHeader(b'content-type', 'text/plain') defer.returnValue(self.contentFinish(content)) elif staticpath == "stats/yesterday": self.setHeader(b'content-type', 'application/json') content = yield rpc("get_yesterday_stats") defer.returnValue(self.contentFinish(content)) elif staticpath == "notification": ################################################################# # Here we need to parse POST data in x-www-form-urlencoded format ################################################################# content_receiver = BodyReceiver(defer.Deferred()) self.bodyProducer.startProducing(content_receiver) yield self.bodyProducer.finished content = ''.join(content_receiver._data) args = {} ctype = self.requestHeaders.getRawHeaders(b'content-type') if ctype is not None: ctype = ctype[0] if self.method == b"POST" and ctype: key, pdict = parse_header(ctype) if key == b'application/x-www-form-urlencoded': args.update(parse_qs(content, 1)) ################################################################# if 'by' in args and 'url' in args and 'comment' in args: tmp = [] tmp.append("From: Tor2web Node %s.%s <%s>\n" % (config.nodename, config.basehost, config.smtpmail)) tmp.append("To: %s\n" % config.smtpmailto_notifications) tmp.append("Subject: Tor2web Node (IPv4 %s, IPv6 %s): notification for %s\n" % (config.listen_ipv4, config.listen_ipv6, args['url'][0])) tmp.append("Content-Type: text/plain; charset=ISO-8859-1\n") tmp.append("Content-Transfer-Encoding: 8bit\n\n") tmp.append("BY: %s\n" % (args['by'][0])) tmp.append("URL: %s\n" % (args['url'][0])) tmp.append("COMMENT: %s\n" % (args['comment'][0])) message = StringIO(''.join(tmp)) try: sendmail(config.smtpuser, config.smtppass, config.smtpmail, config.smtpmailto_notifications, message, config.smtpdomain, config.smtpport) except Exception: pass self.setHeader(b'content-type', 'text/plain') defer.returnValue(self.contentFinish('')) else: if type(antanistaticmap[staticpath]) == str: filename, ext = os.path.splitext(staticpath) self.setHeader(b'content-type', mimetypes.types_map[ext]) content = antanistaticmap[staticpath] defer.returnValue(self.contentFinish(content)) elif type(antanistaticmap[staticpath]) == PageTemplate: defer.returnValue(flattenString(self, antanistaticmap[staticpath]).addCallback(self.contentFinish)) except Exception: pass self.sendError(404) defer.returnValue(NOT_DONE_YET) else: self.obj.uri = request.uri if not request.host: self.sendError(406, 'error_invalid_hostname.tpl') defer.returnValue(NOT_DONE_YET) if config.mode == "TRANSLATION": self.obj.onion = config.onion else: self.obj.onion = request.host.split(".")[0] + ".onion" rpc_log("detected <onion_url>.tor2web Hostname: %s" % self.obj.onion) if not verify_onion(self.obj.onion): self.sendError(406, 'error_invalid_hostname.tpl') defer.returnValue(NOT_DONE_YET) if config.mode == "ACCESSLIST": if not hashlib.md5(self.obj.onion) in access_list: self.sendError(403, 'error_hs_completely_blocked.tpl') defer.returnValue(NOT_DONE_YET) elif config.mode == "BLACKLIST": if hashlib.md5(self.obj.onion).hexdigest() in access_list: self.sendError(403, 'error_hs_completely_blocked.tpl') defer.returnValue(NOT_DONE_YET) if hashlib.md5(self.obj.onion + self.obj.uri).hexdigest() in access_list: self.sendError(403, 'error_hs_specific_page_blocked.tpl') defer.returnValue(NOT_DONE_YET) # we need to verify if the user is using tor; # on this condition it's better to redirect on the .onion if self.getClientIP() in tor_exits_list: self.redirect("http://" + self.obj.onion + request.uri) try: self.finish() except Exception: pass defer.returnValue(None) # Avoid image hotlinking if request.uri.lower().endswith(('gif','jpg','png')): if request.headers.getRawHeaders(b'referer') is not None and \ not config.basehost in request.headers.getRawHeaders(b'referer')[0].lower(): self.sendError(403) defer.returnValue(NOT_DONE_YET) # the requested resource is remote, we act as proxy self.process_request(request) parsed = urlparse(self.obj.address) self.var['address'] = self.obj.address self.var['onion'] = self.obj.onion.replace(".onion", "") self.var['path'] = parsed[2] if parsed[3] is not None and parsed[3] != '': self.var['path'] += '?' + parsed[3] agent = Agent(reactor, sockhost=config.sockshost, sockport=config.socksport, pool=self.pool) if config.dummyproxy is None: proxy_url = 's' + self.obj.address else: proxy_url = config.dummyproxy + parsed[2] + '?' + parsed[3] self.proxy_d = agent.request(self.method, proxy_url, self.obj.headers, bodyProducer=producer) self.proxy_d.addCallback(self.cbResponse) self.proxy_d.addErrback(self.handleError) defer.returnValue(NOT_DONE_YET)
def process(self): content = "" request = Storage() request.headers = self.requestHeaders request.host = self.getRequestHostname() request.uri = self.uri content_length = self.getHeader(b'content-length') transfer_encoding = self.getHeader(b'transfer-encoding') staticpath = request.uri staticpath = re.sub('\/$', '/index.html', staticpath) staticpath = re.sub('^(/antanistaticmap/)?', '', staticpath) staticpath = re.sub('^/', '', staticpath) resource_is_local = isIPAddress(request.host) or \ isIPv6Address(request.host) or \ request.uri == '/robots.txt' or \ request.uri.startswith('/antanistaticmap/') if content_length is not None: self.bodyProducer.length = int(content_length) producer = self.bodyProducer request.headers.removeHeader(b'content-length') elif transfer_encoding is not None: producer = self.bodyProducer request.headers.removeHeader(b'transfer-encoding') else: producer = None if config.mirror is not None: if config.basehost in config.mirror: config.mirror.remove(config.basehost) self.var['mirror'] = choice(config.mirror) # we serve contents only over https if not self.isSecure(): self.redirect("https://" + request.host + request.uri) self.finish() return # 0: Request admission control stage # we try to deny some ua/crawlers regardless the request is (valid or not) / (local or not) # we deny EVERY request to known user agents reconized with pattern matching if config.blockcrawl and request.headers.getRawHeaders( b'user-agent') != None: for ua in t2w.blocked_ua: check = request.headers.getRawHeaders(b'user-agent')[0].lower() if re.match(ua, check): self.sendError(403, "error_blocked_ua.tpl") defer.returnValue(NOT_DONE_YET) # 1: Client capability assessment stage if request.headers.getRawHeaders(b'accept-encoding') != None: if re.search('gzip', request.headers.getRawHeaders(b'accept-encoding')[0]): self.obj.client_supports_gzip = True # 2: Content delivery stage # we need to verify if the requested resource is local (/antanistaticmap/*) or remote # because some checks must be done only for remote requests; # in fact local content is always served (css, js, and png in fact are used in errors) if resource_is_local: # the requested resource is local, we deliver it directly try: if staticpath == "notification": ################################################################# # Here we need to parse POST data in x-www-form-urlencoded format ################################################################# content_receiver = BodyReceiver(defer.Deferred()) self.bodyProducer.startProducing(content_receiver) yield self.bodyProducer.finished content = ''.join(content_receiver._data) args = {} ctype = self.requestHeaders.getRawHeaders(b'content-type') if ctype is not None: ctype = ctype[0] if self.method == b"POST" and ctype: mfd = b'multipart/form-data' key, pdict = parse_header(ctype) if key == b'application/x-www-form-urlencoded': args.update(parse_qs(content, 1)) ################################################################# if 'by' in args and 'url' in args and 'comment' in args: tmp = [] tmp.append("From: Tor2web Node %s.%s <%s>\n" % (config.nodename, config.basehost, config.smtpmail)) tmp.append("To: %s\n" % (config.smtpmailto_notifications)) tmp.append( "Subject: Tor2web Node (IPv4 %s, IPv6 %s): notification for %s\n" % (config.listen_ipv4, config.listen_ipv6, args['url'][0])) tmp.append( "Content-Type: text/plain; charset=ISO-8859-1\n") tmp.append("Content-Transfer-Encoding: 8bit\n\n") tmp.append("BY: %s\n" % (args['by'][0])) tmp.append("URL: %s\n" % (args['url'][0])) tmp.append("COMMENT: %s\n" % (args['comment'][0])) message = StringIO(''.join(tmp)) try: sendmail(config.smtpuser, config.smtppass, config.smtpmail, config.smtpmailto_notifications, message, config.smtpdomain, config.smtpport) except: pass else: if type(antanistaticmap[staticpath]) == str: filename, ext = os.path.splitext(staticpath) self.setHeader(b'content-type', mimetypes.types_map[ext]) content = antanistaticmap[staticpath] defer.returnValue(self.contentFinish(content)) elif type(antanistaticmap[staticpath]) == PageTemplate: defer.returnValue( flattenString( self, antanistaticmap[staticpath]).addCallback( self.contentFinish)) except: pass self.sendError(404) defer.returnValue(NOT_DONE_YET) else: self.obj.uri = request.uri if not request.host: self.sendError(406, 'error_invalid_hostname.tpl') defer.returnValue(NOT_DONE_YET) if config.mode == "TRANSLATION": self.obj.onion = config.onion else: self.obj.onion = request.host.split(".")[0] + ".onion" log.msg("detected <onion_url>.tor2web Hostname: %s" % self.obj.onion) if not verify_onion(self.obj.onion): self.sendError(406, 'error_invalid_hostname.tpl') defer.returnValue(NOT_DONE_YET) if config.mode == "ACCESSLIST": if self.obj.onion not in t2w.accesslist: self.sendError(403, 'error_hs_completely_blocked.tpl') defer.returnValue(NOT_DONE_YET) elif config.mode == "BLACKLIST": if hashlib.md5( self.obj.onion).hexdigest() in t2w.accesslist: self.sendError(403, 'error_hs_completely_blocked.tpl') defer.returnValue(NOT_DONE_YET) if hashlib.md5(self.obj.onion + self.obj.uri).hexdigest() in t2w.accesslist: self.sendError(403, 'error_hs_specific_page_blocked.tpl') defer.returnValue(NOT_DONE_YET) # we need to verify if the user is using tor; # on this condition it's better to redirect on the .onion if self.getClientIP() in t2w.TorExitNodes: self.redirect("http://" + self.obj.onion + request.uri) try: self.finish() except: pass return # Avoid image hotlinking if request.uri.lower().endswith(('gif', 'jpg', 'png')): if request.headers.getRawHeaders( b'referer' ) != None and not config.basehost in request.headers.getRawHeaders( b'referer')[0].lower(): self.sendError(403) defer.returnValue(NOT_DONE_YET) # the requested resource is remote, we act as proxy t2w.process_request(self.obj, request) parsed = urlparse(self.obj.address) self.var['address'] = self.obj.address self.var['onion'] = self.obj.onion.replace(".onion", "") self.var['path'] = parsed[2] + '?' + parsed[3] agent = Agent(reactor, sockhost=config.sockshost, sockport=config.socksport, pool=self.pool) self.proxy_d = agent.request(self.method, 's' + self.obj.address, self.obj.headers, bodyProducer=producer) self.proxy_d.addCallback(self.cbResponse) self.proxy_d.addErrback(self.handleError) defer.returnValue(NOT_DONE_YET)
def process(self): content = "" request = Storage() request.headers = self.requestHeaders request.host = self.getRequestHostname() request.uri = self.uri if config.mirror is not None: self.var['mirror'] = choice(config.mirror) # we serve contents only over https if not self.isSecure(): self.redirect("https://" + request.host + request.uri) self.finish() return # 0: Request admission control stage # firstly we try to instruct spiders that honour robots.txt that we don't want to get indexed if request.uri == "/robots.txt" and config.blockcrawl: self.write("User-Agent: *\n") self.write("Disallow: /\n") self.finish() return # secondly we try to deny some ua/crawlers regardless the request is (valid or not) / (local or not) # we deny EVERY request to known user agents reconized with pattern matching if request.headers.getRawHeaders(b'user-agent') != None: for ua in t2w.blocked_ua: check = request.headers.getRawHeaders(b'user-agent')[0].lower() if re.match(ua, check): return self.sendError(403, "error_blocked_ua.tpl") # we need to verify if the requested resource is local (/antanistaticmap/*) or remote # because some checks must be done only for remote requests; # in fact local content is always served (css, js, and png in fact are used in errors) if not verify_resource_is_local(request.host, request.uri, self.staticmap): if not request.host: return self.sendError(406, 'error_invalid_hostname.tpl') if config.mode == "TRANSLATION": self.obj.onion = config.onion else: self.obj.onion = request.host.split(".")[0] + ".onion" log.msg("detected <onion_url>.tor2web Hostname: %s" % self.obj.onion) if not verify_onion(self.obj.onion): return self.sendError(406, 'error_invalid_hostname.tpl') if config.mode == "ACCESSLIST": if self.obj.onion not in self.accesslist: return self.sendError(403, 'error_hs_completely_blocked.tpl') elif config.mode == "BLOCKLIST": if hashlib.md5(self.obj.onion).hexdigest() in self.accesslist: return self.sendError(403, 'error_hs_completely_blocked.tpl') if hashlib.md5(self.obj.onion + self.obj.uri).hexdigest() in accesslist: return self.sendError(403, 'error_hs_specific_page_blocked.tpl') self.obj.uri = request.uri # we need to verify if the user is using tor; # on this condition it's better to redirect on the .onion if self.getClientIP() in t2w.TorExitNodes: self.redirect("http://" + self.obj.onion + request.uri) self.finish() return # Avoid image hotlinking if request.uri.lower().endswith(('gif','jpg','png')): if request.headers.getRawHeaders(b'referer') != None and not config.basehost in request.headers.getRawHeaders(b'referer')[0].lower(): return self.sendError(403) self.setHeader(b'strict-transport-security', b'max-age=31536000') # 1: Client capability assesment stage if request.headers.getRawHeaders(b'accept-encoding') != None: if re.search('gzip', request.headers.getRawHeaders(b'accept-encoding')[0]): self.obj.client_supports_gzip = True # 2: Content delivery stage if verify_resource_is_local(request.host, request.uri, self.staticmap): # the requested resource is local, we deliver it directly try: staticpath = request.uri staticpath = re.sub('\/$', '/index.html', staticpath) staticpath = re.sub('^('+self.staticmap+')?', '', staticpath) staticpath = re.sub('^/', '', staticpath) if staticpath in antanistaticmap: if type(antanistaticmap[staticpath]) == str: filename, ext = os.path.splitext(staticpath) self.setHeader(b'content-type', mimetypes.types_map[ext]) content = antanistaticmap[staticpath] elif type(antanistaticmap[staticpath]) == PageTemplate: return flattenString(self, antanistaticmap[staticpath]).addCallback(self.contentFinish) elif staticpath == "notification": if 'by' in self.args and 'url' in self.args and 'comment' in self.args: tmp = [] tmp.append("From: Tor2web Node %s.%s <%s>\n" % (config.nodename, config.basehost, config.smtpmail)) tmp.append("To: %s\n" % (config.smtpmailto_notifications)) tmp.append("Subject: Tor2web Node (IPv4 %s, IPv6 %s): notification for %s\n" % (config.listen_ipv4, config.listen_ipv6, self.args['url'][0])) tmp.append("Content-Type: text/plain; charset=ISO-8859-1\n") tmp.append("Content-Transfer-Encoding: 8bit\n\n") tmp.append("BY: %s\n" % (self.args['by'][0])) tmp.append("URL: %s\n" % (self.args['url'][0])) tmp.append("COMMENT: %s\n" % (self.args['comment'][0])) message = StringIO(''.join(tmp)) sendmail(config.smtpuser, config.smtppass, config.smtpmail, config.smtpmailto_notifications, message, config.smtpdomain, config.smtpport) else: return self.sendError(404) except: return self.sendError(404) return self.contentFinish(content) else: # the requested resource is remote, we act as proxy t2w.process_request(self.obj, request) try: parsed = urlparse(self.obj.address) protocol = parsed[0] host = parsed[1] if ':' in host: host, port = host.split(":") port = int(port) else: port = self.ports[protocol] except: return self.sendError(400, "error_invalid_hostname.tpl") dest = client._parse(self.obj.address) # scheme, host, port, path self.var['onion'] = self.obj.onion self.var['path'] = dest[3] content_length = self.getHeader(b'content-length') if content_length is not None and content_length >= 0: bodyProducer = BodyProducer(self.content, content_length) request.headers.removeHeader(b'content-length') else: bodyProducer = None agent = Agent(reactor, sockhost=config.sockshost, sockport=config.socksport, pool=self.pool) d = agent.request(self.method, 'shttp://'+dest[1]+dest[3], self.obj.headers, bodyProducer=bodyProducer) d.addCallback(self.cbResponse) d.addErrback(self.handleError) return NOT_DONE_YET