def proxy_crawl(self, url, ip, reactor): endpoint = TCP4ClientEndpoint(reactor, ip.split(':')[0], int(ip.split(':')[1]), timeout=self.settings.get('DOWNLOAD_TIMEOUT', 10)) agent = ProxyAgent(endpoint) response = agent.request(b"GET", url.encode('ascii'), Headers(self.headers)) return response
def _asynchronous_check(self, item): logger.debug('checking...') from twisted.internet import reactor # must here import endpoint = HostnameEndpoint(reactor, item[0], int(item[1])) agent = ProxyAgent(endpoint) # headers = {'User-Agent': ['Mozilla/5.0']} headers = Headers(headers) # headers wrapper cd = agent.request(b'GET', b'https://www.baidu.com/', headers=headers) # ? cd._connectTimeout = 3 def check_code(response, **kwargs): if response.code < 300: logger.info('valid ip!') return kwargs.pop('item', None) else: raise Exception('invalid') def err(f): logger.debug(f) return f cd.addCallbacks(check_code, err, callbackKeywords={'item': item}) # cd.addErrback(err) return cd
def main(): endpoint = lib.twisted.TCP4ClientEndpoint() agent = ProxyAgent(endpoint) d = agent.request("GET", "http://echo.ioloop.io/a/b?format=json") d.addCallbacks(display, err) d.addCallback(lambda ignored: reactor.stop()) reactor.run()
def get_device_from_smcloud(self, user_agent): """ Queries ScientiaMobile's API and returns a dictionary of the device. """ # Create basic auth string b64 = base64.encodestring(self.smcloud_api_key).strip() if self.http_proxy_host: headers = { "X-Cloud-Client": [self.SMCLOUD_CONFIG["client_version"]], "Authorization": ["Basic %s" % b64], "User-Agent": [str(user_agent)], } if self.http_proxy_username and self.http_proxy_password: auth = base64.encodestring("%s:%s" % (self.http_proxy_username, self.http_proxy_password)).strip() # Cater for many proxy servers headers["Proxy-Authorization"] = ["Basic %s" % auth] headers["Proxy-Authenticate"] = ["Basic %s" % auth] headers["Proxy-Authentication"] = ["Basic %s" % auth] endpoint = TCP4ClientEndpoint(reactor, self.http_proxy_host, self.http_proxy_port or 80, timeout=5) agent = ProxyAgent(endpoint) response = yield agent.request("GET", self.SMCLOUD_CONFIG["url"], headers=Headers(headers)) if response.code != 200: raise ProxyConnectError() d = Deferred() response.deliverBody(SimpleReceiver(d)) body = yield d else: headers = {"X-Cloud-Client": self.SMCLOUD_CONFIG["client_version"], "Authorization": "Basic %s" % b64} try: body = yield getPage(self.SMCLOUD_CONFIG["url"], headers=headers, agent=user_agent, timeout=5) except ConnectError, exc: raise ScientiaMobileCloudHandlerConnectError(exc)
def doRequest(noreason): proxyEndpoint = TCP4ClientEndpoint(reactor, '127.0.0.1', 8787) agent = ProxyAgent(proxyEndpoint, reactor) log.msg("Doing HTTP request via Lantern (127.0.0.1:8787) for %s" % self.url) request = agent.request("GET", self.url) request.addCallback(readBody) request.addCallback(addResultToReport) request.addCallback(self.processDirector.close) return request
def request_echo(self, content): """ Send a get request to request the test server to echo the content """ ouinet_client_endpoint = TCP4ClientEndpoint( reactor, "127.0.0.1", TestFixtures.FIRST_CLIENT["port"]) agent = ProxyAgent(ouinet_client_endpoint) return agent.request( "GET", "http://127.0.0.1:" + str(TestFixtures.TEST_HTTP_SERVER_PORT) + "/?content=" + content)
def process(self): host = self.getHeader('Host') if not host: log.err("No host header given") self.setResponseCode(400) self.finish() return port = 80 if self.isSecure() == True: port = 443 if ':' in host: host, port = host.split(':') port = int(port) log.msg("self: %s" % (self)) log.msg("host:port: %s:%s" % (host, port)) self.setHost(host, port) self.content.seek(0, 0) postData = self.content.read() endpoint = TCP4ClientEndpoint(self.reactor, os.environ.get("PROXY_HOST"), int(os.environ.get("PROXY_PORT"))) agent = ProxyAgent(endpoint) scheme = b"http" if self.isSecure() == True: scheme = b"https" userpw = None url = scheme + b"://" + str.encode(host) + b":" + str.encode(str(port)) + self.uri if self.getUser() != None and self.getPassword() != None: userpw = self.getUser() + b":" + self.getPassword() url = scheme + b"://" + userpw + b"@" + str.encode(host) + b":" + str.encode(str(port)) + self.uri log.msg("URL: %s" % (url)) d = Deferred() log.msg("Method: %s" % (self.method)) headers = {}; for key, values in self.requestHeaders.getAllRawHeaders(): if key.lower() == "connection": headers[key] = ['close'] elif key.lower() == 'keep-alive': next else: headers[key] = values if self.method == b"POST" or self.method == b"PUT": log.msg("POST Data: %s" % (postData)) body = BytesProducer(postData) d = agent.request(self.method, url, Headers(headers), body) else: d = agent.request(self.method, url, Headers(headers)) d.addCallback(self.forwardToClient)
def _request(self, request, method): """ Helper method that sends the given HTTP request. """ # Copy the headers from the request. headers = Headers() for (key, value) in request.headers.iteritems(): headers.addRawHeader(key, value) # If a username and password are given, then add basic authentication. if (self.options.username is not None and self.options.password is not None): auth = "%s:%s" % (self.options.username, self.options.password) auth = auth.encode("base64").strip() headers.addRawHeader('Authorization', 'Basic ' + auth) # Determine if the user has configured a proxy server. url_parts = urlparse.urlparse(request.url) proxy = self.options.proxy.get(url_parts.scheme, None) # Construct an agent to send the request. if proxy is not None: (hostname, port) = proxy.split(":") endpoint = TCP4ClientEndpoint(reactor, hostname, int(port), timeout = self.options.timeout) agent = ProxyAgent(endpoint) else: agent = Agent(reactor, self.httpsPolicy, connectTimeout = self.options.timeout) url = request.url.encode("utf-8") producer = StringProducer(request.message or "") response = yield agent.request(method, url, headers, producer) # If the initial request returned a redirection response, attempt to follow it. http_redirect_codes = [http.MOVED_PERMANENTLY, # 301 http.FOUND, # 302 http.SEE_OTHER, # 303 http.TEMPORARY_REDIRECT] # 307 if response.code in http_redirect_codes and response.headers.hasHeader("Location"): new_url = response.headers.getRawHeaders("Location")[0] producer = StringProducer(request.message or "") response = yield agent.request(method, new_url, headers, producer) # Construct a simple response consumer and give it the response body. consumer = StringResponseConsumer() response.deliverBody(consumer) yield consumer.getDeferred() consumer.response = response defer.returnValue(consumer)
def agent(self, scheme='http'): if not self._agents: pool = HTTPConnectionPool(reactor) pool.maxPersistentPerHost = 10 pool.cachedConnectionTimeout = 15 contextFactory = PermissiveBrowserLikePolicyForHTTPS() proxies = getproxies() if 'http' in proxies or 'https' in proxies: # I've noticed some intermittent failures (ResponseNeverReceived) to # POST request through a proxy when persistent connections are enabled. pool.persistent = False if 'https' in proxies: proxy = urlparse(proxies.get('https')) if proxy: # Note- this isn't going to work completely. It's not being # passed the modified contextFactory, and in fact it doesn't # even work properly for other reasons (ZPS-2061) log.info("Creating https proxy (%s:%s)" % (proxy.hostname, proxy.port)) endpoint = TCP4ClientEndpoint(reactor, proxy.hostname, proxy.port, timeout=CONNECT_TIMEOUT) SessionManager._agents['https'] = \ ProxyAgent(endpoint, reactor, pool=pool) else: SessionManager._agents['https'] = \ Agent(reactor, pool=pool, connectTimeout=CONNECT_TIMEOUT, contextFactory=contextFactory) if 'http' in proxies: proxy = urlparse(proxies.get('http')) if proxy: log.info("Creating http proxy (%s:%s)" % (proxy.hostname, proxy.port)) endpoint = TCP4ClientEndpoint(reactor, proxy.hostname, proxy.port, timeout=CONNECT_TIMEOUT) SessionManager._agents['http'] = \ ProxyAgent(endpoint, reactor, pool=pool) else: SessionManager._agents['http'] = \ Agent(reactor, pool=pool, connectTimeout=CONNECT_TIMEOUT) return SessionManager._agents[scheme]
def build_agent(req): uri = URI.fromBytes(req.url) proxy = req.get('proxy') if req.get('use_proxy') is False: proxy = None if proxy: if uri.scheme == 'https': agent = TunnelingAgent( reactor=reactor, proxy=proxy, contextFactory=ScrapexClientContextFactory(), connectTimeout=req.get('timeout')) else: endpoint = TCP4ClientEndpoint(reactor, host=proxy.host, port=proxy.port, timeout=req.get('timeout')) agent = ProxyAgent(endpoint) if proxy.auth_header: req.get('headers')['Proxy-Authorization'] = proxy.auth_header else: agent = Agent(reactor) agent = RedirectAgent(agent, redirectLimit=3) agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)]) return agent
def test_lantern_circumvent(self): proxyEndpoint = TCP4ClientEndpoint(reactor, '127.0.0.1', 8787) agent = ProxyAgent(proxyEndpoint, reactor) def addResultToReport(result): self.report['body'] = result self.report['success'] = True def addFailureToReport(failure): self.report['failure'] = handleAllFailures(failure) self.report['success'] = False def doRequest(noreason): log.debug( "Doing HTTP request via Lantern (127.0.0.1:8787) for %s" % self.url) request = agent.request("GET", self.url) request.addCallback(readBody) request.addCallback(addResultToReport) request.addCallback(self.processDirector.close) return request self.processDirector.bootstrapped.addCallback(doRequest) self.processDirector.bootstrapped.addErrback(addFailureToReport) self.runLantern() return self.d
def request(method, url, headers, producer=None, use_tor=False): """Make a HTTP request and returns a deferred """ if use_tor: host = urlparse(url).netloc port = config.Application().tor_socks_port proxy = TCP4ClientEndpoint(reactor, "localhost", int(port)) agent = ProxyAgent(SOCKS5ClientEndpoint(host, 80, proxy)) else: agent = Agent(reactor) if headers is None: headers = Headers({'User-Agent': ['Twisted Web Client']}) log.msg('Using TOR network' if use_tor else 'Using standard network') log.msg('Request URL: {}'.format(url)) return agent.request(method, url, headers, producer)
def _create_agent(self, req): """ create right agent for specific request """ agent = None uri = URI.fromBytes(req.url) proxy = req.get('proxy') if req.get('use_proxy') is False: proxy = None if proxy: if uri.scheme == 'https': agent_key = 'httpsproxy-%s-%s' % (proxy.host, proxy.port) agent = self._agents.get(agent_key) if not agent: agent = TunnelingAgent(reactor=reactor, proxy=proxy, contextFactory=ScrapexClientContextFactory(), connectTimeout=30, pool=self._pool) self._agents[agent_key] = agent else: #http agent_key = 'httpproxy-%s-%s' % (proxy.host, proxy.port) agent = self._agents.get(agent_key) if not agent: endpoint = TCP4ClientEndpoint(reactor, host=proxy.host, port=proxy.port , timeout=req.get('timeout')) agent = ProxyAgent(endpoint, pool=self._pool) self._agents[agent_key] = agent if proxy.auth_header: req.get('headers')['Proxy-Authorization'] = proxy.auth_header else: agent = self._direct_agent #use single agent when no proxies used redirectLimit = self.scraper.config.get('max_redirects') if redirectLimit is None: redirectLimit = 3 if redirectLimit>0: agent = BrowserLikeRedirectAgent(agent, redirectLimit=redirectLimit) agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)]) if self.cj is not None: agent = CookieAgent(agent, self.cj) return agent
def setUp(self): self.app = fixture(self) self.port = reactor.listenTCP(0, Site(self.app.resource()), interface="127.0.0.1") self.addCleanup(self.port.stopListening) portno = self.port.getHost().port self.agent = ProxyAgent( TCP4ClientEndpoint(reactor, "127.0.0.1", portno), reactor) super(RealTests, self).setUp()
def setUp(self): path = self.mktemp() self.app = fixture(self) self.port = reactor.listenUNIX( path, Site(self.app.resource()), ) self.addCleanup(self.port.stopListening) self.agent = ProxyAgent(UNIXClientEndpoint(reactor, path), reactor) super(RealTests, self).setUp()
def _get_agent(scheme, host, reactor, contextFactory=None): if scheme == b"https": proxy_endpoint = os.environ.get("https_proxy") if proxy_endpoint: proxy_url = urlparse.urlparse(proxy_endpoint) endpoint = TCP4ClientEndpoint(reactor, proxy_url.hostname, proxy_url.port) return ProxyAgent(endpoint) else: if contextFactory is None: contextFactory = WebVerifyingContextFactory(host) return Agent(reactor, contextFactory) else: proxy_endpoint = os.environ.get("http_proxy") if proxy_endpoint: proxy_url = urlparse.urlparse(proxy_endpoint) endpoint = TCP4ClientEndpoint(reactor, proxy_url.hostname, proxy_url.port) return ProxyAgent(endpoint) else: return Agent(reactor)
def _client(*args, **kwargs): reactor = default_reactor(kwargs.get('reactor')) pool = default_pool(reactor, kwargs.get('pool'), kwargs.get('persistent')) if 'proxy' in kwargs.keys(): address, port = kwargs.get('proxy') endpoint = TCP4ClientEndpoint(reactor, address, port) agent = ProxyAgent(endpoint) else: agent = Agent(reactor, pool=pool) return HTTPClient(agent)
def main(): """Main loop """ reactor.callLater(0, consumeEvents) logging.basicConfig(level=logging.DEBUG) log.debug('Starting...') o = urlparse.urlparse(DOCKER_HOST) if o.scheme == 'unix': endpoint = UNIXClientEndpoint(reactor, o.path) elif o.scheme in ('tcp', 'http'): port = o.port or 80 endpoint = TCP4ClientEndpoint(reactor, o.hostname, port) else: assert 0 agent = ProxyAgent(endpoint) d = agent.request('GET', '/events') d.addCallback(callback) d.addErrback(errback) return d
def _agent(self): if not self._proxyIp: agent = Agent(self._reactor, connectTimeout=self._timeout) else: endpoint = TCP4ClientEndpoint(reactor=self._reactor, host=self._ipAddr, port=self._port, timeout=self._timeout) agent = ProxyAgent(endpoint) agent = RedirectAgent(agent) if self._follow else agent return agent.request("GET", self._reqURL, self._headers)
def __init__(self, redirectLimit=5, connectTimeout=30, proxy=None): self.lastURI = None if proxy: try: endpoint = TCP4ClientEndpoint(reactor, proxy["host"], proxy["port"], timeout=connectTimeout) except: raise TypeError("ResolverAgent's proxy argument need to be a dict with fields host and port") agent = ProxyAgent(endpoint) else: agent = Agent(reactor, connectTimeout=connectTimeout) RedirectAgent.__init__(self, agent, redirectLimit=redirectLimit)
def __init__(self, proxy_host="scorpion.premiumize.me", use_proxy=False, p_user='', p_pass=''): print "Twisted Agent in use", __TW_VER__ # can not follow rel. url redirects (location header) self.headers = Headers(agent_headers) self.useProxy = use_proxy and twEndpoints if self.useProxy: self.endpoint = TCP4ClientEndpoint(reactor, proxy_host, 80) self.agent = RedirectAgent(ProxyAgent(self.endpoint)) auth = base64.b64encode("%s:%s" % (p_user, p_pass)) self.headers.addRawHeader('Proxy-Authorization', 'Basic ' + auth.strip()) else: self.agent = RedirectAgent(Agent(reactor))
def __init__(self, scraper, pool=None): self.scraper = scraper self._pool = pool self._agents = {} #map proxy->an agent redirectLimit = scraper.config.get('max_redirects') if redirectLimit is None: redirectLimit = 3 #create an agent for direct requests self._direct_agent = Agent(reactor, pool=self._pool, connectTimeout=scraper.config.get('timeout') or 30) if redirectLimit>0: self._direct_agent = BrowserLikeRedirectAgent(self._direct_agent, redirectLimit=redirectLimit) self._direct_agent = ContentDecoderAgent(self._direct_agent, [('gzip', GzipDecoder)]) self.cj = self.scraper.client.opener.cj if self.cj is not None: self._direct_agent = CookieAgent(self._direct_agent, self.cj) #create an agent for http-proxy requests #no endpoint yet, use __ instead of _ to backup the instance self.__http_proxy_agent = ProxyAgent(None, pool=self._pool) if redirectLimit>0: self._http_proxy_agent = BrowserLikeRedirectAgent(self.__http_proxy_agent, redirectLimit=redirectLimit) self._http_proxy_agent = ContentDecoderAgent(self._http_proxy_agent, [('gzip', GzipDecoder)]) else: self._http_proxy_agent = ContentDecoderAgent(self.__http_proxy_agent, [('gzip', GzipDecoder)]) if self.cj is not None: self._http_proxy_agent = CookieAgent(self._http_proxy_agent, self.cj) #create an agent for https-proxy requests #no endpoint yet, use __ instead of _ to backup the instance self.__https_proxy_agent = TunnelingAgent(reactor=reactor, proxy=None, contextFactory=ScrapexClientContextFactory(), connectTimeout=30, pool=self._pool) #no proxy yet if redirectLimit>0: self._https_proxy_agent = BrowserLikeRedirectAgent(self.__https_proxy_agent, redirectLimit=redirectLimit) self._https_proxy_agent = ContentDecoderAgent(self._https_proxy_agent, [('gzip', GzipDecoder)]) else: self._https_proxy_agent = ContentDecoderAgent(self.__https_proxy_agent, [('gzip', GzipDecoder)]) if self.cj is not None: self._https_proxy_agent = CookieAgent(self._https_proxy_agent, self.cj)
def _request(self, request, method): """ Helper method that sends the given HTTP request. """ # Copy the headers from the request. headers = Headers() for (key, value) in request.headers.iteritems(): headers.addRawHeader(key, value) # If a username and password are given, then add basic authentication. if (self.options.username is not None and self.options.password is not None): auth = "%s:%s" % (self.options.username, self.options.password) auth = auth.encode("base64").strip() headers.addRawHeader('Authorization', 'Basic ' + auth) # Determine if the user has configured a proxy server. url_parts = urlparse.urlparse(request.url) proxy = self.options.proxy.get(url_parts.scheme, None) # Construct an agent to send the request. if proxy is not None: (hostname, port) = proxy.split(":") endpoint = TCP4ClientEndpoint(reactor, hostname, int(port), timeout = self.options.timeout) agent = ProxyAgent(endpoint) else: agent = Agent(reactor, self.contextFactory, connectTimeout = self.options.timeout) url = request.url.encode("utf-8") producer = StringProducer(request.message or "") response = yield agent.request(method, url, headers, producer) # Construct a simple response consumer and give it the response body. consumer = StringResponseConsumer() response.deliverBody(consumer) yield consumer.getDeferred() consumer.response = response defer.returnValue(consumer)
def setUp(self): # We use relpath as you can't bind to a path longer than 107 # chars. You can easily get an absolute path that long # from mktemp, but rather strangely bind doesn't care # how long the abspath is, so we call relpath here and # it should work as long as our method names aren't too long path = os.path.relpath(self.mktemp()) self.app = fixture(self) self.port = reactor.listenUNIX( path, Site(self.app.resource()), ) self.addCleanup(self.port.stopListening) self.agent = ProxyAgent(UNIXClientEndpoint(reactor, path), reactor) super(RealTests, self).setUp()
def _get_agent(self, request): if request.proxy: _, _, proxy_host, proxy_port, proxy_params = _parse_url_args(request.proxy) scheme = _parse_url_args(request.url)[0] omit_connect_tunnel = proxy_params.find('noconnect') >= 0 if scheme == 'https' and not omit_connect_tunnel: proxy_conf = (proxy_host, proxy_port, request.headers.get('Proxy-Authorization', None)) return TunnelingAgent(reactor, proxy_conf, self.context_factory, self.timeout, self.bind_address, self.pool) else: endpoint = TCP4ClientEndpoint(reactor, proxy_host, proxy_port, self.timeout, self.bind_address) return ProxyAgent(endpoint) return Agent(reactor, self.context_factory, self.timeout, self.bind_address, self.pool)
def _agent(self): if not self._proxyIp: agent = Agent(self._reactor, contextFactory=NoVerifyContextFactory(), connectTimeout=self._timeout) else: endpoint = TCP4ClientEndpoint(reactor=self._reactor, host=self._ipAddr, port=self._port, timeout=self._timeout) agent = ProxyAgent(endpoint) if self._follow in ('follow', 'sticky', 'stickyport'): agent = RedirectAgentZ(agent, onRedirect=self._follow, port=self._port, proxy=self._proxyIp) return agent.request("GET", self._reqURL, self._headers)
def runClient(connect, rate): http_proxy = os.getenv('HTTP_PROXY') if http_proxy: http_proxy = furl(http_proxy) ep = HostnameEndpoint(reactor, http_proxy.host, http_proxy.port) ua = ProxyAgent(ep) else: ua = Agent(reactor) client = Client(connect, ua) looper = task.LoopingCall(client.request_GET) # register signal handler to stop the looping call def signal_handler(signal, frame): looper.stop() reactor.runUntilCurrent() reactor.stop() signal.signal(signal.SIGINT, signal_handler) looper.start(1 / rate)
def _getAgent(self, request, timeout, delay_time): proxy = request.meta.get('proxy') if proxy: scheme = _parsed(request.url)[0] proxyHost, proxyPort, _, _ = _parsed(proxy) creds = request.headers.getRawHeaders('Proxy-Authorization', None) creds_02 = creds[0].encode(self.auth_encoding) if isinstance( creds, list) else creds proxyPort = int(proxyPort) if isinstance(proxyPort, bytes) else proxyPort proxyHost = proxyHost.decode(self.auth_encoding) logger.warning(*self.lfm.crawled( "Spider", self.spider.name, '正在通过代理<%s>下载,延迟了%6.3f,时间为:' % (proxy, delay_time), { 'request': request, 'time': time.clock() })) logger.info( *self.lfm.crawled('Spider', self.spider.name, '使用代理:%s:%s' % (proxyHost, str(proxyPort)), request)) if scheme == b'https': proxyConfig = (proxyHost, proxyPort, creds_02) request.headers.removeHeader('Proxy-Authorization') return TunnelingAgent(reactor, proxyConfig, contextFactory=self._contextFactoryProxy, connectTimeout=timeout, bindAddress=self._bindAddress, pool=self._pool) else: endpoint = TCP4ClientEndpoint(reactor, proxyHost, proxyPort) return ProxyAgent(endpoint=endpoint, pool=self._pool) else: return self._Agent(reactor, contextFactory=self._contextFactory, connectTimeout=timeout, bindAddress=self._bindAddress, pool=self._pool)
def get_page(self, url, *args, **kwds): """ Define our own get_page method so that we can easily override the factory when we need to. This was copied from the following: * twisted.web.client.getPage * twisted.web.client._makeGetterFactory """ contextFactory = None scheme, host, port, path = parse(url) data = kwds.get('postdata', None) self._method = method = kwds.get('method', 'GET') self.request_headers = self._headers(kwds.get('headers', {})) if (self.body_producer is None) and (data is not None): self.body_producer = FileBodyProducer(StringIO(data)) if scheme == "https": proxy_endpoint = os.environ.get("https_proxy") if proxy_endpoint: proxy_url = urlparse.urlparse(proxy_endpoint) endpoint = TCP4ClientEndpoint(self.reactor, proxy_url.hostname, proxy_url.port) agent = ProxyAgent(endpoint) else: if self.endpoint.ssl_hostname_verification: contextFactory = WebVerifyingContextFactory(host) else: contextFactory = WebClientContextFactory() agent = Agent(self.reactor, contextFactory) self.client.url = url d = agent.request(method, url, self.request_headers, self.body_producer) else: proxy_endpoint = os.environ.get("http_proxy") if proxy_endpoint: proxy_url = urlparse.urlparse(proxy_endpoint) endpoint = TCP4ClientEndpoint(self.reactor, proxy_url.hostname, proxy_url.port) agent = ProxyAgent(endpoint) else: agent = Agent(self.reactor) d = agent.request(method, url, self.request_headers, self.body_producer) d.addCallback(self._handle_response) return d
def request_url(self, port, url): ouinet_client_endpoint = TCP4ClientEndpoint(reactor, "127.0.0.1", port) agent = ProxyAgent(ouinet_client_endpoint) return agent.request("GET", url)
from cyclone import escape from cyclone.web import HTTPError from twisted.internet import defer from twisted.internet import reactor from twisted.internet.protocol import Protocol from twisted.internet.endpoints import TCP4ClientEndpoint from twisted.web.client import Agent, ProxyAgent from twisted.web.http_headers import Headers from twisted.web.iweb import IBodyProducer from zope.interface import implements agent = Agent(reactor) proxy_agent = ProxyAgent(None, reactor) class StringProducer(object): implements(IBodyProducer) def __init__(self, body): self.body = body self.length = len(body) def startProducing(self, consumer): consumer.write(self.body) return defer.succeed(None) def pauseProducing(self): pass
def cbBody(body): # print('Response body:') body = body.decode('utf8') return body pool = HTTPConnectionPool(reactor) endpoint = TCP4ClientEndpoint(reactor, settings.PROXY_ADDRESS, settings.PROXY_PORT) tunnelingAgent = TunnelingAgent( reactor, (settings.PROXY_ADDRESS, settings.PROXY_PORT, None), BrowserLikePolicyForHTTPS(), pool=pool) proxyAgent = ProxyAgent(endpoint, pool=pool) normalAgent = Agent(reactor, pool=pool) # pool = None def get(reactor, url, headers={}, body=None): ssl = url.split(':')[0] if ssl == 'https' and settings.USE_PROXY: agent = tunnelingAgent else: if settings.USE_PROXY: agent = proxyAgent else: agent = normalAgent
def json_call(self, method, **kwargs): https = kwargs.pop('https', False) blowfish = kwargs.pop('blowfish', True) url_args = {'method': method} if self.partner_id: url_args['partner_id'] = self.partner_id if self.user_id: url_args['user_id'] = self.user_id if self.user_auth_token or self.partner_auth_token: url_args['auth_token'] = (self.user_auth_token or self.partner_auth_token) protocol = 'https' if https else 'http' url = protocol + self.rpc_url + urllib.urlencode(url_args) if self.time_offset: kwargs['syncTime'] = int(time.time() + self.time_offset) if self.user_auth_token: kwargs['userAuthToken'] = self.user_auth_token elif self.partner_auth_token: kwargs['partnerAuthToken'] = self.partner_auth_token data = json.dumps(kwargs) if blowfish: data = self.pandora_encrypt(data) if self.proxy_host: endpoint = TCP4ClientEndpoint(reactor, self.proxy_host, self.proxy_port) agent = ProxyAgent(endpoint, WebClientContextFactory()) else: agent = Agent(reactor, WebClientContextFactory()) headers = Headers({'User-Agent': ['pithos'], 'Content-type': ['text/plain']}) body = FileBodyProducer(StringIO(data)) response = yield agent.request('POST', url, headers, body) body = yield readBody(response) tree = json.loads(body) if tree['stat'] == 'fail': code = tree['code'] msg = tree['message'] log.msg('fault code: {} message: {}'.format(code, msg)) if code == API_ERROR.INVALID_AUTH_TOKEN: raise PandoraAuthTokenInvalid(msg) elif code == API_ERROR.COUNTRY_NOT_SUPPORTED: raise PandoraError( "Pandora not available", code, "Pandora is not available outside the US.") elif code == API_ERROR.API_VERSION_NOT_SUPPORTED: raise PandoraAPIVersionError(msg) elif code == API_ERROR.INSUFFICIENT_CONNECTIVITY: raise PandoraError( "Out of sync", code, "Correct your system's clock.") elif code == API_ERROR.READ_ONLY_MODE: raise PandoraError( "Pandora maintenance", code, "Pandora is in read-only mode as it is performing " "maintenance. Try again later.") elif code == API_ERROR.INVALID_LOGIN: raise PandoraError( "Login Error", code, "Invalid username or password.") elif code == API_ERROR.LISTENER_NOT_AUTHORIZED: raise PandoraError( "Pandora One Error", code, "A Pandora One account is required to access this " "feature.") elif code == API_ERROR.PARTNER_NOT_AUTHORIZED: raise PandoraError( "Login Error", code, "Invalid Pandora partner keys.") else: raise PandoraError(msg, code) if 'result' in tree: returnValue(tree['result'])
def __init__(self, address: str, port: int, destination: str, is_app_hosting: bool): self._address = address self._port = port self._destination = destination self._is_app_hosting = is_app_hosting self._zmq_factory = ZmqFactory() # if the ZMQ app is binding and hosting the server, we need to connect to that instead if is_app_hosting: zmq_socket_class = ZmqDealerConnection zmq_endpoint = ZmqEndpoint(ZmqEndpointType.connect, "tcp://%s:%d" % (address, port)) LOG.info("Configured txZMQ for connecting to application " "- connected to tcp://%s:%d" % (address, port)) else: # otherwise, bind to the address/port and have them connect to us zmq_socket_class = ZmqRouterConnection zmq_endpoint = ZmqEndpoint(ZmqEndpointType.bind, "tcp://%s:%d" % (address, port)) LOG.info("Configured txZMQ for application connecting to us " "- socket bound to tcp://%s:%d" % (address, port)) self._zmq_socket = zmq_socket_class(self._zmq_factory, zmq_endpoint) # store the socket identity of the client; we need it to send data back to the local ZMQ app self._zmq_socket_identity = None LOG.debug("Initializing socket and agent") # check if we want to use an HTTPS proxy; useful for Fiddler if USE_HTTPS_PROXY: self._twisted_agent = ProxyAgent( HostnameEndpoint(reactor, PROXY_HOST, PROXY_PORT), reactor) LOG.warning("Agent is using HTTP proxy for outbound work!") else: # otherwise, use the standard Agent with a nulled SSL verification process, since self-signed certificates # fail the connection process entirely self._twisted_agent = Agent( reactor, contextFactory=DisableSSLVerificationFactory()) # setup auto-POST method for our socket def post_data(*zmq_data_recv): self._zmq_socket_identity = zmq_data_recv[0] data = zmq_data_recv[-1] LOG.debug("Received %d bytes of data" % len(data)) # hash and base64 our data for validation and transportation data_hash = hashlib.sha256(data).hexdigest() b64_data = base64.b64encode(data) # POST it to the remote server request = self._twisted_agent.request( b'POST', (destination + "/zmq").encode(), Headers({ 'User-Agent': ['ZMQ-HTTP-Bridge-Agent'], 'X-Verify-Hash': [data_hash] }), bodyProducer=StringProducer(b64_data)) def handle_twisted_error(fail): # print out _all_ errors, since Twisted doesn't provide all exceptions for error in fail.value.reasons: LOG.error("%s", str(error)) request.addErrback(handle_twisted_error) request.addCallback( lambda ignored: LOG.debug("Request completed.")) LOG.info("Forwarded data to destination (hash preview: %s)" % data_hash[0:8]) self._zmq_socket.gotMessage = post_data
def _test_proxy_alive(self, host, port, protocol, proxy_type, url=b'http://www.baidu.com', timeout=10): endpoint = TCP4ClientEndpoint(reactor, host, int(port)) agent = ProxyAgent(endpoint) d = agent.request(b'GET', url) self.currents += 1 proxy = '{}:{}'.format(host, port) key = 'proxy_info:' + proxy if proxy_type == 'rookies_checking': def _callback(ignored): pipe = self.conn.pipeline(False) pipe.zrem('rookies_checking', proxy) pipe.hset(key, 'failed_times', 0) # Move proxy from rookies to availables pipe.smove('rookie_proxies', 'available_proxies', '{}://{}'.format(protocol, proxy)) pipe.zadd('availables_checking', proxy, time.time() + 30) pipe.execute() def _errback(err): if self.conn.hincrby(key, 'failed_times', 1) < 3: # If not reach the maximum of failed_times # Since it is not important so re-check it after 10 seconds self.conn.zadd('rookies_checking', proxy, time.time() + 10) else: pipe = self.conn.pipeline(False) pipe.zrem('rookies_checking', proxy) pipe.smove('rookie_proxies', 'dead_proxies', '{}://{}'.format(protocol, proxy)) pipe.execute() else: def _callback(ignored): pipe = self.conn.pipeline(False) pipe.hset(key, 'failed_times', 0) pipe.zadd('availables_checking', proxy, time.time() + 30) pipe.smove('lost_proxies', 'available_proxies', '{}://{}'.format(protocol, proxy)) pipe.execute() def _errback(err): pipe = self.conn.pipeline(False) if self.conn.hincrby(key, 'failed_times', 1) < 3: pipe.zadd('availables_checking', proxy, time.time() + 10) pipe.smove('available_proxies', 'lost_proxies', '{}://{}'.format(protocol, proxy)) else: pipe.zrem('availables_checking', proxy) pipe.smove('lost_proxies', 'dead_proxies', '{}://{}'.format(protocol, proxy)) pipe.delete(key) pipe.execute() d.addCallbacks(_callback, _errback) reactor.callLater(timeout, d.cancel) def _clean(ignored): self.currents -= 1 d.addBoth(_clean)
print(pformat(list(response.headers.getAllRawHeaders()))) print('Response code:', response.code) print('Response phrase:', response.phrase) # url = response.headers.getRawHeaders('Location')[0].encode('utf-8') # # d = agent.request(b'GET',url, Headers({'Proxy-Authorization': [encode_user]})) # d.addCallback(cbRequest) # d.addCallback(get_onePage) # d = readBody(response) # d.addCallback(get_onePage) return d host= "211.147.239.101" port = 44370 user_name = base64.b64encode('spider:123456'.encode('utf-8')).strip() encode_user = b'Basic '+user_name print(encode_user) endpoint = TCP4ClientEndpoint(reactor, host, port) agent = ProxyAgent(endpoint) d = agent.request(b"GET", b"https://www.baidu.com") d.addCallback(redirect) d.addErrback(display) d.addCallback(lambda ignored: reactor.stop()) reactor.run()
print(pformat(list(response.headers.getAllRawHeaders()))) print('Response code:', response.code) print('Response phrase:', response.phrase) # url = response.headers.getRawHeaders('Location')[0].encode('utf-8') # # d = agent.request(b'GET',url, Headers({'Proxy-Authorization': [encode_user]})) # d.addCallback(cbRequest) # d.addCallback(get_onePage) d = readBody(response) d.addCallback(get_onePage) return d host_01 = "149.28.192.96" host = '47.105.165.81' port = 5527 # header = Headers() user_name = base64.b64encode('spider:123456'.encode('utf-8')).strip() encode_user = b'Basic ' + user_name endpoint = TCP4ClientEndpoint(reactor, host, port) agent = ProxyAgent(endpoint) d = agent.request(b"GET", b"http://www.zimuzu.tv/", Headers({'Proxy-Authorization': [encode_user]})) d.addCallback(redirect) d.addErrback(display) d.addCallback(lambda ignored: reactor.stop()) reactor.run()