def main(): url = sys.argv[1] dest = client._parse(url) # scheme, host, port, path proxy = (None, 'localhost', 1080, True, None, None) endpoint = endpoints.TCP4ClientEndpoint(reactor, dest[1], dest[2]) wrapper = SOCKSWrapper(reactor, proxy[1], proxy[2], endpoint) f = client.HTTPClientFactory(url) f.deferred.addCallback(clientcb) d = wrapper.connect(f) d.addCallback(wrappercb) reactor.run()
class Client(Process): def __init__(self, port, ip, num_circuits, num_measures, time_interval, file_prefix, queue): Process.__init__(self) self.factory = clFactory(num_measures, time_interval, file_prefix, num_circuits, queue) endpoint = endpoints.TCP4ClientEndpoint(reactor, ip, port) self.s = SOCKSWrapper(reactor, "localhost", 9050, endpoint) self.num_circuits = num_circuits self.queue = queue self.start() def run(self): Process.run(self) for i in range(self.num_circuits): self.s.connect(self.factory) reactor.run()
def sockswrapper(self, proxy, url): dest = urlparse(url) assert dest.port is not None, 'Must specify port number.' endpoint = endpoints.TCP4ClientEndpoint(reactor, dest.hostname, dest.port) return SOCKSWrapper(reactor, proxy[1], proxy[2], endpoint, self.timestamps)
def __init__(self, port, ip, num_circuits, num_measures, time_interval, file_prefix, queue): Process.__init__(self) self.factory = clFactory(num_measures, time_interval, file_prefix, num_circuits, queue) endpoint = endpoints.TCP4ClientEndpoint(reactor, ip, port) self.s = SOCKSWrapper(reactor, "localhost", 9050, endpoint) self.num_circuits = num_circuits self.queue = queue self.start()
def process(self): try: content = "" request = Storage() request.headers = self.getAllHeaders().copy() request.host = request.headers.get('host') request.uri = self.uri request.resourceislocal = False # 0: Request admission control stage # firstly we try to instruct spiders that honour robots.txt that we don't want to get indexed if request.uri == "/robots.txt" and config.blockcrawl: self.write("User-Agent: *\n") self.write("Disallow: /\n") self.finish() return # secondly we try to deny some ua/crawlers regardless the request is (valid or not) / (local or not) # we deny EVERY request to known user agents reconized with pattern matching if request.headers.get('user-agent') in t2w.blocked_ua: return self.error(403, "error_blocked_ua.xml") # we need to verify if the requested resource is local (/antanistaticmap/*) or remote # becouse some checks must be done only for remote requests; # in fact local content is always served (css, js, and png in fact are used in errors) if request.host == config.listen_ip: request.resourceislocal = True else: request.resourceislocal = request.uri.startswith( self.staticmap) if not request.resourceislocal: # we need to validate the request to avoid useless processing if not request.host: return self.error(400, "error_invalid_hostname.xml") if not t2w.verify_hostname(self.obj, request.host, request.uri): return self.error(self.obj.error['code'], self.obj.error['template']) # we need to verify if the user is using tor; # on this condition it's better to redirect on the .onion if self.getClientIP() in t2w.TorExitNodes: self.redirect("http://" + self.obj.hostname + request.uri) self.finish() return # pattern matching checks to for early request refusal. # # future pattern matching checks for denied content and conditions must be put in the stage # if request.uri.lower().endswith(('gif', 'jpg', 'png')): # Avoid image hotlinking if request.headers.get( 'referer' ) == None or not config.basehost in request.headers.get( 'referer').lower(): return self.error(403) # we serve contents only over https if not self.isSecure(): self.redirect("https://" + self.getRequestHostname() + request.uri) self.finish() return self.setHeader('strict-transport-security', 'max-age=31536000') # 1: Client capability assesment stage if request.headers.get('accept-encoding') != None: if re.search('gzip', request.headers.get('accept-encoding')): self.obj.client_supports_gzip = True if request.headers.get('connection') != None: if re.search('keep-alive', request.headers.get('connection')): self.obj.client_supports_keepalive = True # 2: Content delivery stage if request.resourceislocal: # the requested resource is local, we deliver it directly try: staticpath = request.uri staticpath = re.sub('\/$', '/index.html', staticpath) staticpath = re.sub('^(' + self.staticmap + ')?', '', staticpath) staticpath = re.sub('^/', '', staticpath) if staticpath in antanistaticmap: if type(antanistaticmap[staticpath]) == str: filename, ext = os.path.splitext(staticpath) self.setHeader('content-type', mimetypes.types_map[ext]) content = antanistaticmap[staticpath] elif type(antanistaticmap[staticpath]) == PageTemplate: return flattenString( None, antanistaticmap[staticpath]).addCallback( self.contentFinish) elif staticpath.startswith("notification"): if 'by' in self.args and 'url' in self.args and 'comment' in self.args: message = "" message += "From: Tor2web Node %s <%s>\n" % ( config.listen_ip, config.smtpmail) message += "To: %s\n" % ( config.smtpmailto_notifications) message += "Subject: Tor2web Node %s: notification for %s\n" % ( config.listen_ip, self.args['url'][0]) message += "Content-Type: text/plain; charset=ISO-8859-1\n" message += "Content-Transfer-Encoding: 8bit\n\n" message += "BY: %s\n" % (self.args['by'][0]) message += "URL: %s\n" % (self.args['url'][0]) message += "COMMENT: %s\n" % ( self.args['comment'][0]) message = StringIO(message) sendmail(config.smtpuser, config.smtppass, config.smtpmail, config.smtpmailto_notifications, message, config.smtpdomain, config.smtpport) else: return self.error(404) except: return self.error(404) return self.contentFinish(content) else: # the requested resource is remote, we act as proxy if not t2w.process_request(self.obj, request): return self.error(self.obj.error['code'], self.obj.error['template']) parsed = urlparse.urlparse(self.obj.address) protocol = parsed[0] host = parsed[1] if ':' in host: host, port = host.split(":") port = int(port) else: port = self.ports[protocol] self.rest = urlparse.urlunparse(('', '') + parsed[2:]) if not self.rest: self.rest = "/" class_ = self.protocols[protocol] dest = client._parse( self.obj.address) # scheme, host, port, path endpoint = endpoints.TCP4ClientEndpoint( reactor, dest[1], dest[2]) wrapper = SOCKSWrapper(reactor, config.sockshost, config.socksport, endpoint) f = class_(self.method, self.rest, self.clientproto, self.obj.headers, content, self, self.obj) d = wrapper.connect(f) d.addErrback(self.sockserror) return NOT_DONE_YET except: exc_type, exc_value, exc_traceback = sys.exc_info() MailException(exc_type, exc_value, exc_traceback)
def process(self): try: content = "" request = Storage() request.headers = self.getAllHeaders().copy() request.host = request.headers.get('host') request.uri = self.uri request.resourceislocal = False # 0: Request admission control stage # firstly we try to instruct spiders that honour robots.txt that we don't want to get indexed if request.uri == "/robots.txt" and config.blockcrawl: self.write("User-Agent: *\n") self.write("Disallow: /\n") self.finish() return # secondly we try to deny some ua/crawlers regardless the request is (valid or not) / (local or not) # we deny EVERY request to known user agents reconized with pattern matching if request.headers.get('user-agent') in t2w.blocked_ua: return self.error(403, "error_blocked_ua.xml") # we need to verify if the requested resource is local (/antanistaticmap/*) or remote # becouse some checks must be done only for remote requests; # in fact local content is always served (css, js, and png in fact are used in errors) if request.host == config.listen_ip: request.resourceislocal = True else: request.resourceislocal = request.uri.startswith(self.staticmap) if not request.resourceislocal: # we need to validate the request to avoid useless processing if not request.host: return self.error(400, "error_invalid_hostname.xml") if not t2w.verify_hostname(self.obj, request.host, request.uri): return self.error(self.obj.error['code'], self.obj.error['template']) # we need to verify if the user is using tor; # on this condition it's better to redirect on the .onion if self.getClientIP() in t2w.TorExitNodes: self.redirect("http://" + self.obj.hostname + request.uri) self.finish() return # pattern matching checks to for early request refusal. # # future pattern matching checks for denied content and conditions must be put in the stage # if request.uri.lower().endswith(('gif','jpg','png')): # Avoid image hotlinking if request.headers.get('referer') == None or not config.basehost in request.headers.get('referer').lower(): return self.error(403) # we serve contents only over https if not self.isSecure(): self.redirect("https://" + self.getRequestHostname() + request.uri) self.finish() return self.setHeader('strict-transport-security', 'max-age=31536000') # 1: Client capability assesment stage if request.headers.get('accept-encoding') != None: if re.search('gzip', request.headers.get('accept-encoding')): self.obj.client_supports_gzip = True if request.headers.get('connection') != None: if re.search('keep-alive', request.headers.get('connection')): self.obj.client_supports_keepalive = True # 2: Content delivery stage if request.resourceislocal: # the requested resource is local, we deliver it directly try: staticpath = request.uri staticpath = re.sub('\/$', '/index.html', staticpath) staticpath = re.sub('^('+self.staticmap+')?', '', staticpath) staticpath = re.sub('^/', '', staticpath) if staticpath in antanistaticmap: if type(antanistaticmap[staticpath]) == str: filename, ext = os.path.splitext(staticpath) self.setHeader('content-type', mimetypes.types_map[ext]) content = antanistaticmap[staticpath] elif type(antanistaticmap[staticpath]) == PageTemplate: return flattenString(None, antanistaticmap[staticpath]).addCallback(self.contentFinish) elif staticpath.startswith("notification"): if 'by' in self.args and 'url' in self.args and 'comment' in self.args: message = "" message += "From: Tor2web Node %s <%s>\n" % (config.listen_ip, config.smtpmail) message += "To: %s\n" % (config.smtpmailto_notifications) message += "Subject: Tor2web Node %s: notification for %s\n" % (config.listen_ip, self.args['url'][0]) message += "Content-Type: text/plain; charset=ISO-8859-1\n" message += "Content-Transfer-Encoding: 8bit\n\n" message += "BY: %s\n" % (self.args['by'][0]) message += "URL: %s\n" % (self.args['url'][0]) message += "COMMENT: %s\n" % (self.args['comment'][0]) message = StringIO(message) sendmail(config.smtpuser, config.smtppass, config.smtpmail, config.smtpmailto_notifications, message, config.smtpdomain, config.smtpport) else: return self.error(404) except: return self.error(404) return self.contentFinish(content) else: # the requested resource is remote, we act as proxy if not t2w.process_request(self.obj, request): return self.error(self.obj.error['code'], self.obj.error['template']) parsed = urlparse.urlparse(self.obj.address) protocol = parsed[0] host = parsed[1] if ':' in host: host, port = host.split(":") port = int(port) else: port = self.ports[protocol] self.rest = urlparse.urlunparse(('', '') + parsed[2:]) if not self.rest: self.rest = "/" class_ = self.protocols[protocol] dest = client._parse(self.obj.address) # scheme, host, port, path endpoint = endpoints.TCP4ClientEndpoint(reactor, dest[1], dest[2]) wrapper = SOCKSWrapper(reactor, config.sockshost, config.socksport, endpoint) f = class_(self.method, self.rest, self.clientproto, self.obj.headers, content, self, self.obj) d = wrapper.connect(f) d.addErrback(self.sockserror) return NOT_DONE_YET except: exc_type, exc_value, exc_traceback = sys.exc_info() MailException(exc_type, exc_value, exc_traceback)