Esempio n. 1
0
    def __init__(self, channel, queued, reactor=reactor):
        """
        Method overridden to change some part of proxy.Request and of the base http.Request
        """
        self.reactor = reactor
        self.notifications = []
        self.channel = channel
        self.queued = queued
        self.requestHeaders = Headers()
        self.received_cookies = {}
        self.responseHeaders = Headers()
        self.cookies = []  # outgoing cookies
        self.bodyProducer = BodyProducer()
        self.proxy_d = None
        self.proxy_response = None

        self.stream = ''

        self.header_injected = False
        # If we should disable the banner,
        # say that we have already injected it.
        if config['disable_banner']:
            self.header_injected = True

        if queued:
            self.transport = StringTransport()
        else:
            self.transport = self.channel.transport

        self.obj = Tor2webObj()
        self.var = Storage()
        self.var['version'] = VERSION
        self.var['basehost'] = config['basehost']
        self.var['errorcode'] = None

        self.html = False

        self.decoderGzip = None
        self.encoderGzip = None

        self.pool = pool
Esempio n. 2
0
    def process(self):
        content = ""

        request = Storage()
        request.headers = self.requestHeaders
        request.host = self.getRequestHostname()
        request.uri = self.uri

        content_length = self.getHeader(b'content-length')
        transfer_encoding = self.getHeader(b'transfer-encoding')

        staticpath = request.uri
        staticpath = re.sub('\/$', '/index.html', staticpath)
        staticpath = re.sub('^(/antanistaticmap/)?', '', staticpath)
        staticpath = re.sub('^/', '', staticpath)

        resource_is_local = isIPAddress(request.host) or \
                            isIPv6Address(request.host) or \
                            request.uri == '/robots.txt' or \
                            request.uri.startswith('/antanistaticmap/')

        if content_length is not None:
            self.bodyProducer.length = int(content_length)
            producer = self.bodyProducer
            request.headers.removeHeader(b'content-length')
        elif transfer_encoding is not None:
            producer = self.bodyProducer
            request.headers.removeHeader(b'transfer-encoding')
        else:
            producer = None

        if config.mirror is not None:
            if config.basehost in config.mirror:
                config.mirror.remove(config.basehost)
            self.var['mirror'] = choice(config.mirror)

        # we serve contents only over https
        if not self.isSecure():
            self.redirect("https://" + request.host + request.uri)
            self.finish()
            return

        # 0: Request admission control stage
        # we try to deny some ua/crawlers regardless the request is (valid or not) / (local or not)
        # we deny EVERY request to known user agents reconized with pattern matching
        if config.blockcrawl and request.headers.getRawHeaders(
                b'user-agent') != None:
            for ua in t2w.blocked_ua:
                check = request.headers.getRawHeaders(b'user-agent')[0].lower()
                if re.match(ua, check):
                    self.sendError(403, "error_blocked_ua.tpl")
                    defer.returnValue(NOT_DONE_YET)

        # 1: Client capability assessment stage
        if request.headers.getRawHeaders(b'accept-encoding') != None:
            if re.search('gzip',
                         request.headers.getRawHeaders(b'accept-encoding')[0]):
                self.obj.client_supports_gzip = True

        # 2: Content delivery stage
        # we need to verify if the requested resource is local (/antanistaticmap/*) or remote
        # because some checks must be done only for remote requests;
        # in fact local content is always served (css, js, and png in fact are used in errors)
        if resource_is_local:
            # the requested resource is local, we deliver it directly
            try:
                if staticpath == "notification":

                    #################################################################
                    # Here we need to parse POST data in x-www-form-urlencoded format
                    #################################################################
                    content_receiver = BodyReceiver(defer.Deferred())
                    self.bodyProducer.startProducing(content_receiver)
                    yield self.bodyProducer.finished
                    content = ''.join(content_receiver._data)

                    args = {}

                    ctype = self.requestHeaders.getRawHeaders(b'content-type')
                    if ctype is not None:
                        ctype = ctype[0]

                    if self.method == b"POST" and ctype:
                        mfd = b'multipart/form-data'
                        key, pdict = parse_header(ctype)
                        if key == b'application/x-www-form-urlencoded':
                            args.update(parse_qs(content, 1))
                    #################################################################

                    if 'by' in args and 'url' in args and 'comment' in args:
                        tmp = []
                        tmp.append("From: Tor2web Node %s.%s <%s>\n" %
                                   (config.nodename, config.basehost,
                                    config.smtpmail))
                        tmp.append("To: %s\n" %
                                   (config.smtpmailto_notifications))
                        tmp.append(
                            "Subject: Tor2web Node (IPv4 %s, IPv6 %s): notification for %s\n"
                            % (config.listen_ipv4, config.listen_ipv6,
                               args['url'][0]))
                        tmp.append(
                            "Content-Type: text/plain; charset=ISO-8859-1\n")
                        tmp.append("Content-Transfer-Encoding: 8bit\n\n")
                        tmp.append("BY: %s\n" % (args['by'][0]))
                        tmp.append("URL: %s\n" % (args['url'][0]))
                        tmp.append("COMMENT: %s\n" % (args['comment'][0]))
                        message = StringIO(''.join(tmp))
                        try:
                            sendmail(config.smtpuser, config.smtppass,
                                     config.smtpmail,
                                     config.smtpmailto_notifications, message,
                                     config.smtpdomain, config.smtpport)
                        except:
                            pass
                else:
                    if type(antanistaticmap[staticpath]) == str:
                        filename, ext = os.path.splitext(staticpath)
                        self.setHeader(b'content-type',
                                       mimetypes.types_map[ext])
                        content = antanistaticmap[staticpath]
                        defer.returnValue(self.contentFinish(content))

                    elif type(antanistaticmap[staticpath]) == PageTemplate:
                        defer.returnValue(
                            flattenString(
                                self, antanistaticmap[staticpath]).addCallback(
                                    self.contentFinish))

            except:
                pass

            self.sendError(404)
            defer.returnValue(NOT_DONE_YET)

        else:
            self.obj.uri = request.uri

            if not request.host:
                self.sendError(406, 'error_invalid_hostname.tpl')
                defer.returnValue(NOT_DONE_YET)

            if config.mode == "TRANSLATION":
                self.obj.onion = config.onion
            else:
                self.obj.onion = request.host.split(".")[0] + ".onion"
                log.msg("detected <onion_url>.tor2web Hostname: %s" %
                        self.obj.onion)
                if not verify_onion(self.obj.onion):
                    self.sendError(406, 'error_invalid_hostname.tpl')
                    defer.returnValue(NOT_DONE_YET)

                if config.mode == "ACCESSLIST":
                    if self.obj.onion not in t2w.accesslist:
                        self.sendError(403, 'error_hs_completely_blocked.tpl')
                        defer.returnValue(NOT_DONE_YET)

                elif config.mode == "BLACKLIST":
                    if hashlib.md5(
                            self.obj.onion).hexdigest() in t2w.accesslist:
                        self.sendError(403, 'error_hs_completely_blocked.tpl')
                        defer.returnValue(NOT_DONE_YET)

                    if hashlib.md5(self.obj.onion +
                                   self.obj.uri).hexdigest() in t2w.accesslist:
                        self.sendError(403,
                                       'error_hs_specific_page_blocked.tpl')
                        defer.returnValue(NOT_DONE_YET)

            # we need to verify if the user is using tor;
            # on this condition it's better to redirect on the .onion
            if self.getClientIP() in t2w.TorExitNodes:
                self.redirect("http://" + self.obj.onion + request.uri)

                try:
                    self.finish()
                except:
                    pass

                return

            # Avoid image hotlinking
            if request.uri.lower().endswith(('gif', 'jpg', 'png')):
                if request.headers.getRawHeaders(
                        b'referer'
                ) != None and not config.basehost in request.headers.getRawHeaders(
                        b'referer')[0].lower():
                    self.sendError(403)
                    defer.returnValue(NOT_DONE_YET)

            # the requested resource is remote, we act as proxy

            t2w.process_request(self.obj, request)

            parsed = urlparse(self.obj.address)

            self.var['address'] = self.obj.address
            self.var['onion'] = self.obj.onion.replace(".onion", "")
            self.var['path'] = parsed[2] + '?' + parsed[3]

            agent = Agent(reactor,
                          sockhost=config.sockshost,
                          sockport=config.socksport,
                          pool=self.pool)
            self.proxy_d = agent.request(self.method,
                                         's' + self.obj.address,
                                         self.obj.headers,
                                         bodyProducer=producer)

            self.proxy_d.addCallback(self.cbResponse)
            self.proxy_d.addErrback(self.handleError)

            defer.returnValue(NOT_DONE_YET)
Esempio n. 3
0
    def process(self):
        request = Storage()
        request.headers = self.requestHeaders
        request.host = self.getRequestHostname()
        request.uri = self.uri

        content_length = self.getHeader(b'content-length')
        transfer_encoding = self.getHeader(b'transfer-encoding')

        staticpath = request.uri
        staticpath = re.sub('/$', '/index.html', staticpath)
        staticpath = re.sub('^(/antanistaticmap/)?', '', staticpath)
        staticpath = re.sub('^/', '', staticpath)

        resource_is_local = (config.mode != "TRANSLATION" and
                             (request.host == config.basehost or
                              request.host == 'www.' + config.basehost)) or \
                            isIPAddress(request.host) or \
                            isIPv6Address(request.host) or \
                            (config.overriderobotstxt and request.uri == '/robots.txt') or \
                            request.uri.startswith('/antanistaticmap/')

        if content_length is not None:
            self.bodyProducer.length = int(content_length)
            producer = self.bodyProducer
            request.headers.removeHeader(b'content-length')
        elif transfer_encoding is not None:
            producer = self.bodyProducer
            request.headers.removeHeader(b'transfer-encoding')
        else:
            producer = None

        if config.mirror is not None:
            if config.basehost in config.mirror:
                config.mirror.remove(config.basehost)
            if len(config.mirror) > 1:
                self.var['mirror'] = choice(config.mirror)
            elif len(config.mirror) == 1:
                self.var['mirror'] = config.mirror[0]

        # we serve contents only over https
        if not self.isSecure() and (config.transport != 'HTTP'):
            self.redirect("https://" + request.host + request.uri)
            self.finish()
            defer.returnValue(None)

        # 0: Request admission control stage
        # we try to deny some ua/crawlers regardless the request is (valid or not) / (local or not)
        # we deny EVERY request to known user agents reconized with pattern matching
        if config.blockcrawl and request.headers.getRawHeaders(b'user-agent') is not None:
            for ua in blocked_ua_list:
                if re.match(ua, request.headers.getRawHeaders(b'user-agent')[0].lower()):
                    self.sendError(403, "error_blocked_ua.tpl")
                    defer.returnValue(NOT_DONE_YET)

        # 1: Client capability assessment stage
        if request.headers.getRawHeaders(b'accept-encoding') is not None:
            if re.search('gzip', request.headers.getRawHeaders(b'accept-encoding')[0]):
                self.obj.client_supports_gzip = True

        # 2: Content delivery stage
        # we need to verify if the requested resource is local (/antanistaticmap/*) or remote
        # because some checks must be done only for remote requests;
        # in fact local content is always served (css, js, and png in fact are used in errors)
        if resource_is_local:
            # the requested resource is local, we deliver it directly
            try:
                if staticpath == "dev/null":
                    content = "A" * random.randint(20, 1024)
                    self.setHeader(b'content-type', 'text/plain')
                    defer.returnValue(self.contentFinish(content))

                elif staticpath == "stats/yesterday":
                    self.setHeader(b'content-type', 'application/json')
                    content = yield rpc("get_yesterday_stats")
                    defer.returnValue(self.contentFinish(content))

                elif staticpath == "notification":

                    #################################################################
                    # Here we need to parse POST data in x-www-form-urlencoded format
                    #################################################################
                    content_receiver = BodyReceiver(defer.Deferred())
                    self.bodyProducer.startProducing(content_receiver)
                    yield self.bodyProducer.finished
                    content = ''.join(content_receiver._data)

                    args = {}

                    ctype = self.requestHeaders.getRawHeaders(b'content-type')
                    if ctype is not None:
                        ctype = ctype[0]

                    if self.method == b"POST" and ctype:
                        key, pdict = parse_header(ctype)
                        if key == b'application/x-www-form-urlencoded':
                            args.update(parse_qs(content, 1))
                    #################################################################

                    if 'by' in args and 'url' in args and 'comment' in args:
                        tmp = []
                        tmp.append("From: Tor2web Node %s.%s <%s>\n" % (config.nodename, config.basehost, config.smtpmail))
                        tmp.append("To: %s\n" % config.smtpmailto_notifications)
                        tmp.append("Subject: Tor2web Node (IPv4 %s, IPv6 %s): notification for %s\n" % (config.listen_ipv4, config.listen_ipv6, args['url'][0]))
                        tmp.append("Content-Type: text/plain; charset=ISO-8859-1\n")
                        tmp.append("Content-Transfer-Encoding: 8bit\n\n")
                        tmp.append("BY: %s\n" % (args['by'][0]))
                        tmp.append("URL: %s\n" % (args['url'][0]))
                        tmp.append("COMMENT: %s\n" % (args['comment'][0]))
                        message = StringIO(''.join(tmp))

                        try:
                            sendmail(config.smtpuser,
                                     config.smtppass,
                                     config.smtpmail,
                                     config.smtpmailto_notifications,
                                     message,
                                     config.smtpdomain,
                                     config.smtpport)
                        except Exception:
                            pass

                    self.setHeader(b'content-type', 'text/plain')
                    defer.returnValue(self.contentFinish(''))

                else:
                    if type(antanistaticmap[staticpath]) == str:
                        filename, ext = os.path.splitext(staticpath)
                        self.setHeader(b'content-type', mimetypes.types_map[ext])
                        content = antanistaticmap[staticpath]
                        defer.returnValue(self.contentFinish(content))

                    elif type(antanistaticmap[staticpath]) == PageTemplate:
                        defer.returnValue(flattenString(self, antanistaticmap[staticpath]).addCallback(self.contentFinish))

            except Exception:
                pass

            self.sendError(404)
            defer.returnValue(NOT_DONE_YET)

        else:
            self.obj.uri = request.uri

            if not request.host:
                self.sendError(406, 'error_invalid_hostname.tpl')
                defer.returnValue(NOT_DONE_YET)

            if config.mode == "TRANSLATION":
                self.obj.onion = config.onion
            else:
                self.obj.onion = request.host.split(".")[0] + ".onion"
                rpc_log("detected <onion_url>.tor2web Hostname: %s" % self.obj.onion)
                if not verify_onion(self.obj.onion):
                    self.sendError(406, 'error_invalid_hostname.tpl')
                    defer.returnValue(NOT_DONE_YET)

                if config.mode == "ACCESSLIST":
                    if not hashlib.md5(self.obj.onion) in access_list:
                        self.sendError(403, 'error_hs_completely_blocked.tpl')
                        defer.returnValue(NOT_DONE_YET)

                elif config.mode == "BLACKLIST":
                    if hashlib.md5(self.obj.onion).hexdigest() in access_list:
                        self.sendError(403, 'error_hs_completely_blocked.tpl')
                        defer.returnValue(NOT_DONE_YET)

                    if hashlib.md5(self.obj.onion + self.obj.uri).hexdigest() in access_list:
                        self.sendError(403, 'error_hs_specific_page_blocked.tpl')
                        defer.returnValue(NOT_DONE_YET)

            # we need to verify if the user is using tor;
            # on this condition it's better to redirect on the .onion
            if self.getClientIP() in tor_exits_list:
                self.redirect("http://" + self.obj.onion + request.uri)

                try:
                    self.finish()
                except Exception:
                    pass

                defer.returnValue(None)

            # Avoid image hotlinking
            if request.uri.lower().endswith(('gif','jpg','png')):
                if request.headers.getRawHeaders(b'referer') is not None and \
                   not config.basehost in request.headers.getRawHeaders(b'referer')[0].lower():
                    self.sendError(403)
                    defer.returnValue(NOT_DONE_YET)

            # the requested resource is remote, we act as proxy

            self.process_request(request)

            parsed = urlparse(self.obj.address)

            self.var['address'] = self.obj.address
            self.var['onion'] = self.obj.onion.replace(".onion", "")
            self.var['path'] = parsed[2]
            if parsed[3] is not None and parsed[3] != '':
                self.var['path'] += '?' + parsed[3]

            agent = Agent(reactor, sockhost=config.sockshost, sockport=config.socksport, pool=self.pool)

            if config.dummyproxy is None:
                proxy_url = 's' + self.obj.address
            else:
                proxy_url = config.dummyproxy + parsed[2] + '?' + parsed[3]

            self.proxy_d = agent.request(self.method,
                                         proxy_url,
                                         self.obj.headers, bodyProducer=producer)

            self.proxy_d.addCallback(self.cbResponse)
            self.proxy_d.addErrback(self.handleError)

            defer.returnValue(NOT_DONE_YET)
Esempio n. 4
0
    def process(self):
        content = ""

        request = Storage()
        request.headers = self.requestHeaders
        request.host = self.getRequestHostname()
        request.uri = self.uri

        if config.mirror is not None:
            self.var['mirror'] = choice(config.mirror)

        # we serve contents only over https
        if not self.isSecure():
            self.redirect("https://" + request.host + request.uri)
            self.finish()
            return

        # 0: Request admission control stage
        # firstly we try to instruct spiders that honour robots.txt that we don't want to get indexed
        if request.uri == "/robots.txt" and config.blockcrawl:
            self.write("User-Agent: *\n")
            self.write("Disallow: /\n")
            self.finish()
            return

        # secondly we try to deny some ua/crawlers regardless the request is (valid or not) / (local or not)
        # we deny EVERY request to known user agents reconized with pattern matching
        if request.headers.getRawHeaders(b'user-agent') != None:
            for ua in t2w.blocked_ua:
                check = request.headers.getRawHeaders(b'user-agent')[0].lower()
                if re.match(ua, check):
                    return self.sendError(403, "error_blocked_ua.tpl")

        # we need to verify if the requested resource is local (/antanistaticmap/*) or remote
        # because some checks must be done only for remote requests;
        # in fact local content is always served (css, js, and png in fact are used in errors)

        if not verify_resource_is_local(request.host, request.uri, self.staticmap):
            if not request.host:
                return self.sendError(406, 'error_invalid_hostname.tpl')

            if config.mode == "TRANSLATION":
                self.obj.onion = config.onion
            else:
                self.obj.onion = request.host.split(".")[0] + ".onion"
                log.msg("detected <onion_url>.tor2web Hostname: %s" % self.obj.onion)
                if not verify_onion(self.obj.onion):
                    return self.sendError(406, 'error_invalid_hostname.tpl')

                if config.mode == "ACCESSLIST":
                    if self.obj.onion not in self.accesslist:
                        return self.sendError(403, 'error_hs_completely_blocked.tpl')

                elif config.mode == "BLOCKLIST":
                    if hashlib.md5(self.obj.onion).hexdigest() in self.accesslist:
                        return self.sendError(403, 'error_hs_completely_blocked.tpl')

                    if hashlib.md5(self.obj.onion + self.obj.uri).hexdigest() in accesslist:
                        return self.sendError(403, 'error_hs_specific_page_blocked.tpl')
            
            self.obj.uri = request.uri

            # we need to verify if the user is using tor;
            # on this condition it's better to redirect on the .onion
            if self.getClientIP() in t2w.TorExitNodes:
                self.redirect("http://" + self.obj.onion + request.uri)
                self.finish()
                return

            # Avoid image hotlinking
            if request.uri.lower().endswith(('gif','jpg','png')):
                if request.headers.getRawHeaders(b'referer') != None and not config.basehost in request.headers.getRawHeaders(b'referer')[0].lower():
                    return self.sendError(403)

        self.setHeader(b'strict-transport-security', b'max-age=31536000')

        # 1: Client capability assesment stage
        if request.headers.getRawHeaders(b'accept-encoding') != None:
            if re.search('gzip', request.headers.getRawHeaders(b'accept-encoding')[0]):
                self.obj.client_supports_gzip = True

        # 2: Content delivery stage
        if verify_resource_is_local(request.host, request.uri, self.staticmap):
            # the requested resource is local, we deliver it directly
            try:
                staticpath = request.uri
                staticpath = re.sub('\/$', '/index.html', staticpath)
                staticpath = re.sub('^('+self.staticmap+')?', '', staticpath)
                staticpath = re.sub('^/', '', staticpath)
                if staticpath in antanistaticmap:
                    if type(antanistaticmap[staticpath]) == str:
                        filename, ext = os.path.splitext(staticpath)
                        self.setHeader(b'content-type', mimetypes.types_map[ext])
                        content = antanistaticmap[staticpath]
                    elif type(antanistaticmap[staticpath]) == PageTemplate:
                        return flattenString(self, antanistaticmap[staticpath]).addCallback(self.contentFinish)
                elif staticpath == "notification":
                    if 'by' in self.args and 'url' in self.args and 'comment' in self.args:
                        tmp = []
                        tmp.append("From: Tor2web Node %s.%s <%s>\n" % (config.nodename, config.basehost, config.smtpmail))
                        tmp.append("To: %s\n" % (config.smtpmailto_notifications))
                        tmp.append("Subject: Tor2web Node (IPv4 %s, IPv6 %s): notification for %s\n" % (config.listen_ipv4, config.listen_ipv6, self.args['url'][0]))
                        tmp.append("Content-Type: text/plain; charset=ISO-8859-1\n")
                        tmp.append("Content-Transfer-Encoding: 8bit\n\n")
                        tmp.append("BY: %s\n" % (self.args['by'][0]))
                        tmp.append("URL: %s\n" % (self.args['url'][0]))
                        tmp.append("COMMENT: %s\n" % (self.args['comment'][0]))
                        message = StringIO(''.join(tmp))
                        sendmail(config.smtpuser, config.smtppass, config.smtpmail, config.smtpmailto_notifications, message, config.smtpdomain, config.smtpport)
                    else:
                        return self.sendError(404)

            except:
                return self.sendError(404)

            return self.contentFinish(content)

        else:
            # the requested resource is remote, we act as proxy

            t2w.process_request(self.obj, request)

            try:
                parsed = urlparse(self.obj.address)
                protocol = parsed[0]
                host = parsed[1]
                if ':' in host:
                    host, port = host.split(":")
                    port = int(port)
                else:
                    port = self.ports[protocol]

            except:
                return self.sendError(400, "error_invalid_hostname.tpl")

            dest = client._parse(self.obj.address) # scheme, host, port, path

            self.var['onion'] = self.obj.onion
            self.var['path'] = dest[3]

            content_length = self.getHeader(b'content-length')
            if content_length is not None and content_length >= 0:
                bodyProducer = BodyProducer(self.content,
                                            content_length)

                request.headers.removeHeader(b'content-length')
            else:
                bodyProducer = None

            agent = Agent(reactor, sockhost=config.sockshost, sockport=config.socksport, pool=self.pool)
            d = agent.request(self.method, 'shttp://'+dest[1]+dest[3],
                    self.obj.headers, bodyProducer=bodyProducer)

            d.addCallback(self.cbResponse)
            d.addErrback(self.handleError)

            return NOT_DONE_YET