Esempio n. 1
0
 def proxy_crawl(self, url, ip, reactor):
     endpoint = TCP4ClientEndpoint(reactor, ip.split(':')[0], int(ip.split(':')[1]),
                                   timeout=self.settings.get('DOWNLOAD_TIMEOUT', 10))
     agent = ProxyAgent(endpoint)
     response = agent.request(b"GET", url.encode('ascii'),
                              Headers(self.headers))
     return response
Esempio n. 2
0
    def _asynchronous_check(self, item):

        logger.debug('checking...')

        from twisted.internet import reactor  # must here import
        endpoint = HostnameEndpoint(reactor, item[0], int(item[1]))
        agent = ProxyAgent(endpoint)  #
        headers = {'User-Agent': ['Mozilla/5.0']}
        headers = Headers(headers)  # headers wrapper

        cd = agent.request(b'GET', b'https://www.baidu.com/',
                           headers=headers)  # ?
        cd._connectTimeout = 3

        def check_code(response, **kwargs):
            if response.code < 300:
                logger.info('valid ip!')
                return kwargs.pop('item', None)
            else:
                raise Exception('invalid')

        def err(f):
            logger.debug(f)
            return f

        cd.addCallbacks(check_code, err, callbackKeywords={'item': item})
        # cd.addErrback(err)
        return cd
Esempio n. 3
0
def main():
    endpoint = lib.twisted.TCP4ClientEndpoint()
    agent = ProxyAgent(endpoint)
    d = agent.request("GET", "http://echo.ioloop.io/a/b?format=json")
    d.addCallbacks(display, err)
    d.addCallback(lambda ignored: reactor.stop())
    reactor.run()
 def get_device_from_smcloud(self, user_agent):
     """
     Queries ScientiaMobile's API and returns a dictionary of the device.
     """
     # Create basic auth string
     b64 = base64.encodestring(self.smcloud_api_key).strip()
     if self.http_proxy_host:
         headers = {
             "X-Cloud-Client": [self.SMCLOUD_CONFIG["client_version"]],
             "Authorization": ["Basic %s" % b64],
             "User-Agent": [str(user_agent)],
         }
         if self.http_proxy_username and self.http_proxy_password:
             auth = base64.encodestring("%s:%s" % (self.http_proxy_username, self.http_proxy_password)).strip()
             # Cater for many proxy servers
             headers["Proxy-Authorization"] = ["Basic %s" % auth]
             headers["Proxy-Authenticate"] = ["Basic %s" % auth]
             headers["Proxy-Authentication"] = ["Basic %s" % auth]
         endpoint = TCP4ClientEndpoint(reactor, self.http_proxy_host, self.http_proxy_port or 80, timeout=5)
         agent = ProxyAgent(endpoint)
         response = yield agent.request("GET", self.SMCLOUD_CONFIG["url"], headers=Headers(headers))
         if response.code != 200:
             raise ProxyConnectError()
         d = Deferred()
         response.deliverBody(SimpleReceiver(d))
         body = yield d
     else:
         headers = {"X-Cloud-Client": self.SMCLOUD_CONFIG["client_version"], "Authorization": "Basic %s" % b64}
         try:
             body = yield getPage(self.SMCLOUD_CONFIG["url"], headers=headers, agent=user_agent, timeout=5)
         except ConnectError, exc:
             raise ScientiaMobileCloudHandlerConnectError(exc)
Esempio n. 5
0
 def doRequest(noreason):
     proxyEndpoint = TCP4ClientEndpoint(reactor, '127.0.0.1', 8787)
     agent = ProxyAgent(proxyEndpoint, reactor)
     log.msg("Doing HTTP request via Lantern (127.0.0.1:8787) for %s" % self.url)
     request = agent.request("GET", self.url)
     request.addCallback(readBody)
     request.addCallback(addResultToReport)
     request.addCallback(self.processDirector.close)
     return request
Esempio n. 6
0
 def doRequest(noreason):
     proxyEndpoint = TCP4ClientEndpoint(reactor, '127.0.0.1', 8787)
     agent = ProxyAgent(proxyEndpoint, reactor)
     log.msg("Doing HTTP request via Lantern (127.0.0.1:8787) for %s" %
             self.url)
     request = agent.request("GET", self.url)
     request.addCallback(readBody)
     request.addCallback(addResultToReport)
     request.addCallback(self.processDirector.close)
     return request
Esempio n. 7
0
 def request_echo(self, content):
     """
     Send a get request to request the test server to echo the content 
     """
     ouinet_client_endpoint = TCP4ClientEndpoint(
         reactor, "127.0.0.1", TestFixtures.FIRST_CLIENT["port"])
     agent = ProxyAgent(ouinet_client_endpoint)
     return agent.request(
         "GET", "http://127.0.0.1:" +
         str(TestFixtures.TEST_HTTP_SERVER_PORT) + "/?content=" + content)
Esempio n. 8
0
    def process(self):
        host = self.getHeader('Host')
        if not host:
            log.err("No host header given")
            self.setResponseCode(400)
            self.finish()
            return

        port = 80
        if self.isSecure() == True:
            port = 443
        if ':' in host:
            host, port = host.split(':')
            port = int(port)

        log.msg("self: %s" % (self))
        log.msg("host:port: %s:%s" % (host, port))
        self.setHost(host, port)

        self.content.seek(0, 0)
        postData = self.content.read()
        endpoint = TCP4ClientEndpoint(self.reactor, os.environ.get("PROXY_HOST"), int(os.environ.get("PROXY_PORT")))
        agent = ProxyAgent(endpoint)
        scheme = b"http"
        if self.isSecure() == True:
            scheme = b"https"
        userpw = None
        url = scheme + b"://" + str.encode(host)  + b":" + str.encode(str(port)) + self.uri
        if self.getUser() != None and self.getPassword() != None:
            userpw = self.getUser() + b":" + self.getPassword()
            url = scheme + b"://" + userpw + b"@" + str.encode(host)  + b":" + str.encode(str(port)) + self.uri
        log.msg("URL: %s" % (url))
        d = Deferred()
        log.msg("Method: %s" % (self.method))

        headers = {};
        for key, values in self.requestHeaders.getAllRawHeaders():
            if key.lower() == "connection":
                headers[key] = ['close']
            elif key.lower() == 'keep-alive':
                next
            else:
                headers[key] = values

        if self.method == b"POST" or self.method == b"PUT":
            log.msg("POST Data: %s" % (postData))
            body = BytesProducer(postData)
            d = agent.request(self.method, url, Headers(headers), body)
        else:
            d = agent.request(self.method, url, Headers(headers))
        d.addCallback(self.forwardToClient)
Esempio n. 9
0
    def _request(self, request, method):
        """
        Helper method that sends the given HTTP request.
        """
        # Copy the headers from the request.
        headers = Headers()
        for (key, value) in request.headers.iteritems():
            headers.addRawHeader(key, value)

        # If a username and password are given, then add basic authentication.
        if (self.options.username is not None and
            self.options.password is not None):
            auth = "%s:%s" % (self.options.username, self.options.password)
            auth = auth.encode("base64").strip()
            headers.addRawHeader('Authorization', 'Basic ' + auth)

        # Determine if the user has configured a proxy server.
        url_parts = urlparse.urlparse(request.url)
        proxy = self.options.proxy.get(url_parts.scheme, None)

        # Construct an agent to send the request.
        if proxy is not None:
            (hostname, port) = proxy.split(":")
            endpoint = TCP4ClientEndpoint(reactor, hostname, int(port),
                                          timeout = self.options.timeout)
            agent = ProxyAgent(endpoint)
        else:
            agent = Agent(reactor, self.httpsPolicy,
                          connectTimeout = self.options.timeout)

        url = request.url.encode("utf-8")
        producer = StringProducer(request.message or "")
        response = yield agent.request(method, url, headers, producer)

        # If the initial request returned a redirection response, attempt to follow it.
        http_redirect_codes = [http.MOVED_PERMANENTLY,  # 301
                               http.FOUND,              # 302
                               http.SEE_OTHER,          # 303
                               http.TEMPORARY_REDIRECT] # 307
        if response.code in http_redirect_codes and response.headers.hasHeader("Location"):
            new_url  = response.headers.getRawHeaders("Location")[0]
            producer = StringProducer(request.message or "")
            response = yield agent.request(method, new_url, headers, producer)

        # Construct a simple response consumer and give it the response body.
        consumer = StringResponseConsumer()
        response.deliverBody(consumer)
        yield consumer.getDeferred()
        consumer.response = response
        defer.returnValue(consumer)
    def agent(self, scheme='http'):
        if not self._agents:
            pool = HTTPConnectionPool(reactor)
            pool.maxPersistentPerHost = 10
            pool.cachedConnectionTimeout = 15

            contextFactory = PermissiveBrowserLikePolicyForHTTPS()
            proxies = getproxies()

            if 'http' in proxies or 'https' in proxies:
                # I've noticed some intermittent failures (ResponseNeverReceived) to
                # POST request through a proxy when persistent connections are enabled.
                pool.persistent = False

            if 'https' in proxies:
                proxy = urlparse(proxies.get('https'))
                if proxy:
                    # Note- this isn't going to work completely.  It's not being
                    # passed the modified contextFactory, and in fact it doesn't
                    # even work properly for other reasons (ZPS-2061)
                    log.info("Creating https proxy (%s:%s)" %
                             (proxy.hostname, proxy.port))
                    endpoint = TCP4ClientEndpoint(reactor,
                                                  proxy.hostname,
                                                  proxy.port,
                                                  timeout=CONNECT_TIMEOUT)
                    SessionManager._agents['https'] = \
                        ProxyAgent(endpoint, reactor, pool=pool)
            else:
                SessionManager._agents['https'] = \
                    Agent(reactor, pool=pool, connectTimeout=CONNECT_TIMEOUT, contextFactory=contextFactory)

            if 'http' in proxies:
                proxy = urlparse(proxies.get('http'))
                if proxy:
                    log.info("Creating http proxy (%s:%s)" %
                             (proxy.hostname, proxy.port))
                    endpoint = TCP4ClientEndpoint(reactor,
                                                  proxy.hostname,
                                                  proxy.port,
                                                  timeout=CONNECT_TIMEOUT)
                    SessionManager._agents['http'] = \
                        ProxyAgent(endpoint, reactor, pool=pool)
            else:
                SessionManager._agents['http'] = \
                    Agent(reactor, pool=pool, connectTimeout=CONNECT_TIMEOUT)

        return SessionManager._agents[scheme]
Esempio n. 11
0
def build_agent(req):
    uri = URI.fromBytes(req.url)
    proxy = req.get('proxy')
    if req.get('use_proxy') is False:
        proxy = None

    if proxy:
        if uri.scheme == 'https':

            agent = TunnelingAgent(
                reactor=reactor,
                proxy=proxy,
                contextFactory=ScrapexClientContextFactory(),
                connectTimeout=req.get('timeout'))
        else:
            endpoint = TCP4ClientEndpoint(reactor,
                                          host=proxy.host,
                                          port=proxy.port,
                                          timeout=req.get('timeout'))
            agent = ProxyAgent(endpoint)
            if proxy.auth_header:
                req.get('headers')['Proxy-Authorization'] = proxy.auth_header
    else:
        agent = Agent(reactor)

    agent = RedirectAgent(agent, redirectLimit=3)
    agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)])
    return agent
Esempio n. 12
0
    def test_lantern_circumvent(self):
        proxyEndpoint = TCP4ClientEndpoint(reactor, '127.0.0.1', 8787)
        agent = ProxyAgent(proxyEndpoint, reactor)

        def addResultToReport(result):
            self.report['body'] = result
            self.report['success'] = True

        def addFailureToReport(failure):
            self.report['failure'] = handleAllFailures(failure)
            self.report['success'] = False

        def doRequest(noreason):
            log.debug(
                "Doing HTTP request via Lantern (127.0.0.1:8787) for %s" %
                self.url)
            request = agent.request("GET", self.url)
            request.addCallback(readBody)
            request.addCallback(addResultToReport)
            request.addCallback(self.processDirector.close)
            return request

        self.processDirector.bootstrapped.addCallback(doRequest)
        self.processDirector.bootstrapped.addErrback(addFailureToReport)
        self.runLantern()
        return self.d
Esempio n. 13
0
def request(method, url, headers, producer=None, use_tor=False):
    """Make a HTTP request and returns a deferred
    """

    if use_tor:
        host = urlparse(url).netloc
        port = config.Application().tor_socks_port
        proxy = TCP4ClientEndpoint(reactor, "localhost", int(port))
        agent = ProxyAgent(SOCKS5ClientEndpoint(host, 80, proxy))
    else:
        agent = Agent(reactor)

    if headers is None:
        headers = Headers({'User-Agent': ['Twisted Web Client']})

    log.msg('Using TOR network' if use_tor else 'Using standard network')
    log.msg('Request URL: {}'.format(url))
    return agent.request(method, url, headers, producer)
Esempio n. 14
0
	def _create_agent(self, req):

		""" create right agent for specific request """

		agent = None

		uri = URI.fromBytes(req.url)
		proxy = req.get('proxy')
		if req.get('use_proxy') is False:
			proxy = None
		
		if proxy:	
			if uri.scheme == 'https':
				
				agent_key = 'httpsproxy-%s-%s' % (proxy.host, proxy.port)
				agent = self._agents.get(agent_key)

				if not agent:
					
					agent = TunnelingAgent(reactor=reactor, proxy=proxy, contextFactory=ScrapexClientContextFactory(), connectTimeout=30, pool=self._pool)

					self._agents[agent_key] = agent

			else:
				#http
				agent_key = 'httpproxy-%s-%s' % (proxy.host, proxy.port)
				agent = self._agents.get(agent_key)

				if not agent:
					endpoint = TCP4ClientEndpoint(reactor, host=proxy.host, port=proxy.port , timeout=req.get('timeout'))
					agent = ProxyAgent(endpoint, pool=self._pool)
					self._agents[agent_key] = agent


				if proxy.auth_header:
					req.get('headers')['Proxy-Authorization'] = proxy.auth_header

		else:
			
			agent = self._direct_agent #use single agent when no proxies used


		redirectLimit = self.scraper.config.get('max_redirects')
		if redirectLimit is None:
			redirectLimit = 3
	
		if redirectLimit>0:
			agent = BrowserLikeRedirectAgent(agent, redirectLimit=redirectLimit)

		
		agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)])

		if self.cj is not None:
			agent = CookieAgent(agent, self.cj)
		
		return agent	
Esempio n. 15
0
 def setUp(self):
     self.app = fixture(self)
     self.port = reactor.listenTCP(0, Site(self.app.resource()),
                                   interface="127.0.0.1")
     self.addCleanup(self.port.stopListening)
     portno = self.port.getHost().port
     self.agent = ProxyAgent(
         TCP4ClientEndpoint(reactor, "127.0.0.1", portno),
         reactor)
     super(RealTests, self).setUp()
Esempio n. 16
0
 def setUp(self):
     path = self.mktemp()
     self.app = fixture(self)
     self.port = reactor.listenUNIX(
         path,
         Site(self.app.resource()),
     )
     self.addCleanup(self.port.stopListening)
     self.agent = ProxyAgent(UNIXClientEndpoint(reactor, path), reactor)
     super(RealTests, self).setUp()
Esempio n. 17
0
def _get_agent(scheme, host, reactor, contextFactory=None):
    if scheme == b"https":
        proxy_endpoint = os.environ.get("https_proxy")
        if proxy_endpoint:
            proxy_url = urlparse.urlparse(proxy_endpoint)
            endpoint = TCP4ClientEndpoint(reactor, proxy_url.hostname, proxy_url.port)
            return ProxyAgent(endpoint)
        else:
            if contextFactory is None:
                contextFactory = WebVerifyingContextFactory(host)
            return Agent(reactor, contextFactory)
    else:
        proxy_endpoint = os.environ.get("http_proxy")
        if proxy_endpoint:
            proxy_url = urlparse.urlparse(proxy_endpoint)
            endpoint = TCP4ClientEndpoint(reactor, proxy_url.hostname, proxy_url.port)
            return ProxyAgent(endpoint)
        else:
            return Agent(reactor)
Esempio n. 18
0
def _client(*args, **kwargs):
    reactor = default_reactor(kwargs.get('reactor'))
    pool = default_pool(reactor, kwargs.get('pool'), kwargs.get('persistent'))
    if 'proxy' in kwargs.keys():
        address, port = kwargs.get('proxy')
        endpoint = TCP4ClientEndpoint(reactor, address, port)
        agent = ProxyAgent(endpoint)
    else:
        agent = Agent(reactor, pool=pool)
    return HTTPClient(agent)
Esempio n. 19
0
def main():
    """Main loop
    """
    reactor.callLater(0, consumeEvents)
    logging.basicConfig(level=logging.DEBUG)
    log.debug('Starting...')
    o = urlparse.urlparse(DOCKER_HOST)
    if o.scheme == 'unix':
        endpoint = UNIXClientEndpoint(reactor, o.path)
    elif o.scheme in ('tcp', 'http'):
        port = o.port or 80
        endpoint = TCP4ClientEndpoint(reactor, o.hostname, port)
    else:
        assert 0
    agent = ProxyAgent(endpoint)
    d = agent.request('GET', '/events')
    d.addCallback(callback)
    d.addErrback(errback)
    return d
Esempio n. 20
0
 def _agent(self):
     if not self._proxyIp:
         agent = Agent(self._reactor, connectTimeout=self._timeout)
     else:
         endpoint = TCP4ClientEndpoint(reactor=self._reactor,
                                       host=self._ipAddr,
                                       port=self._port,
                                       timeout=self._timeout)
         agent = ProxyAgent(endpoint)
     agent = RedirectAgent(agent) if self._follow else agent
     return agent.request("GET", self._reqURL, self._headers)
Esempio n. 21
0
 def __init__(self, redirectLimit=5, connectTimeout=30, proxy=None):
     self.lastURI = None
     if proxy:
         try:
             endpoint = TCP4ClientEndpoint(reactor, proxy["host"], proxy["port"], timeout=connectTimeout)
         except:
             raise TypeError("ResolverAgent's proxy argument need to be a dict with fields host and port")
         agent = ProxyAgent(endpoint)
     else:
         agent = Agent(reactor, connectTimeout=connectTimeout)
     RedirectAgent.__init__(self, agent, redirectLimit=redirectLimit)
Esempio n. 22
0
	def __init__(self, proxy_host="scorpion.premiumize.me", use_proxy=False, p_user='', p_pass=''):
		print "Twisted Agent in use", __TW_VER__
		# can not follow rel. url redirects (location header)
		self.headers = Headers(agent_headers)
		self.useProxy = use_proxy and twEndpoints
		if self.useProxy:
			self.endpoint = TCP4ClientEndpoint(reactor, proxy_host, 80)
			self.agent = RedirectAgent(ProxyAgent(self.endpoint))
			auth = base64.b64encode("%s:%s" % (p_user, p_pass))
			self.headers.addRawHeader('Proxy-Authorization', 'Basic ' + auth.strip())
		else:
			self.agent = RedirectAgent(Agent(reactor))
Esempio n. 23
0
	def __init__(self, scraper, pool=None):
		self.scraper = scraper
		self._pool = pool

		self._agents = {} #map proxy->an agent


		redirectLimit = scraper.config.get('max_redirects')
		if redirectLimit is None:
			redirectLimit = 3

		#create an agent for direct requests
		self._direct_agent = Agent(reactor, pool=self._pool, connectTimeout=scraper.config.get('timeout') or 30)
		if redirectLimit>0:
			self._direct_agent = BrowserLikeRedirectAgent(self._direct_agent, redirectLimit=redirectLimit)
		
		self._direct_agent = ContentDecoderAgent(self._direct_agent, [('gzip', GzipDecoder)])
		self.cj = self.scraper.client.opener.cj
		

		if self.cj is not None:
			
			self._direct_agent = CookieAgent(self._direct_agent, self.cj)

		#create an agent for http-proxy requests
		#no endpoint yet, use __ instead of _ to backup the instance
		self.__http_proxy_agent = ProxyAgent(None, pool=self._pool) 

		if redirectLimit>0:
			self._http_proxy_agent = BrowserLikeRedirectAgent(self.__http_proxy_agent, redirectLimit=redirectLimit)

			self._http_proxy_agent = ContentDecoderAgent(self._http_proxy_agent, [('gzip', GzipDecoder)])
		else:

			self._http_proxy_agent = ContentDecoderAgent(self.__http_proxy_agent, [('gzip', GzipDecoder)])
			

		if self.cj is not None:
			self._http_proxy_agent = CookieAgent(self._http_proxy_agent, self.cj)

		#create an agent for https-proxy requests
		#no endpoint yet, use __ instead of _ to backup the instance
		self.__https_proxy_agent = TunnelingAgent(reactor=reactor, proxy=None, contextFactory=ScrapexClientContextFactory(), connectTimeout=30, pool=self._pool) #no proxy yet
		if redirectLimit>0:
			self._https_proxy_agent = BrowserLikeRedirectAgent(self.__https_proxy_agent, redirectLimit=redirectLimit)

			self._https_proxy_agent = ContentDecoderAgent(self._https_proxy_agent, [('gzip', GzipDecoder)])
		else:
			self._https_proxy_agent = ContentDecoderAgent(self.__https_proxy_agent, [('gzip', GzipDecoder)])

			
		if self.cj is not None:
			self._https_proxy_agent = CookieAgent(self._https_proxy_agent, self.cj)
Esempio n. 24
0
    def _request(self, request, method):
        """
        Helper method that sends the given HTTP request.
        """
        # Copy the headers from the request.
        headers = Headers()
        for (key, value) in request.headers.iteritems():
            headers.addRawHeader(key, value)

        # If a username and password are given, then add basic authentication.
        if (self.options.username is not None and
            self.options.password is not None):
            auth = "%s:%s" % (self.options.username, self.options.password)
            auth = auth.encode("base64").strip()
            headers.addRawHeader('Authorization', 'Basic ' + auth)

        # Determine if the user has configured a proxy server.
        url_parts = urlparse.urlparse(request.url)
        proxy = self.options.proxy.get(url_parts.scheme, None)

        # Construct an agent to send the request.
        if proxy is not None:
            (hostname, port) = proxy.split(":")
            endpoint = TCP4ClientEndpoint(reactor, hostname, int(port),
                                          timeout = self.options.timeout)
            agent = ProxyAgent(endpoint)
        else:
            agent = Agent(reactor, self.contextFactory,
                          connectTimeout = self.options.timeout)

        url = request.url.encode("utf-8")
        producer = StringProducer(request.message or "")
        response = yield agent.request(method, url, headers, producer)

        # Construct a simple response consumer and give it the response body.
        consumer = StringResponseConsumer()
        response.deliverBody(consumer)
        yield consumer.getDeferred()
        consumer.response = response
        defer.returnValue(consumer)
Esempio n. 25
0
 def setUp(self):
     # We use relpath as you can't bind to a path longer than 107
     # chars. You can easily get an absolute path that long
     # from mktemp, but rather strangely bind doesn't care
     # how long the abspath is, so we call relpath here and
     # it should work as long as our method names aren't too long
     path = os.path.relpath(self.mktemp())
     self.app = fixture(self)
     self.port = reactor.listenUNIX(
         path,
         Site(self.app.resource()),
     )
     self.addCleanup(self.port.stopListening)
     self.agent = ProxyAgent(UNIXClientEndpoint(reactor, path), reactor)
     super(RealTests, self).setUp()
Esempio n. 26
0
 def _get_agent(self, request):
     if request.proxy:
         _, _, proxy_host, proxy_port, proxy_params = _parse_url_args(request.proxy)
         scheme = _parse_url_args(request.url)[0]
         omit_connect_tunnel = proxy_params.find('noconnect') >= 0
         if scheme == 'https' and not omit_connect_tunnel:
             proxy_conf = (proxy_host, proxy_port,
                           request.headers.get('Proxy-Authorization', None))
             return TunnelingAgent(reactor, proxy_conf, self.context_factory,
                 self.timeout, self.bind_address, self.pool)
         else:
             endpoint = TCP4ClientEndpoint(reactor, proxy_host, proxy_port,
                 self.timeout, self.bind_address)
             return ProxyAgent(endpoint)
     return Agent(reactor, self.context_factory, self.timeout, self.bind_address, self.pool)
Esempio n. 27
0
 def _agent(self):
     if not self._proxyIp:
         agent = Agent(self._reactor,
                       contextFactory=NoVerifyContextFactory(),
                       connectTimeout=self._timeout)
     else:
         endpoint = TCP4ClientEndpoint(reactor=self._reactor,
                                       host=self._ipAddr,
                                       port=self._port,
                                       timeout=self._timeout)
         agent = ProxyAgent(endpoint)
     if self._follow in ('follow', 'sticky', 'stickyport'):
         agent = RedirectAgentZ(agent,
                                onRedirect=self._follow,
                                port=self._port,
                                proxy=self._proxyIp)
     return agent.request("GET", self._reqURL, self._headers)
Esempio n. 28
0
def runClient(connect, rate):
    http_proxy = os.getenv('HTTP_PROXY')
    if http_proxy:
        http_proxy = furl(http_proxy)
        ep = HostnameEndpoint(reactor, http_proxy.host, http_proxy.port)
        ua = ProxyAgent(ep)
    else:
        ua = Agent(reactor)
    client = Client(connect, ua)
    looper = task.LoopingCall(client.request_GET)

    # register signal handler to stop the looping call
    def signal_handler(signal, frame):
        looper.stop()
        reactor.runUntilCurrent()
        reactor.stop()

    signal.signal(signal.SIGINT, signal_handler)

    looper.start(1 / rate)
Esempio n. 29
0
    def _getAgent(self, request, timeout, delay_time):
        proxy = request.meta.get('proxy')

        if proxy:
            scheme = _parsed(request.url)[0]
            proxyHost, proxyPort, _, _ = _parsed(proxy)
            creds = request.headers.getRawHeaders('Proxy-Authorization', None)
            creds_02 = creds[0].encode(self.auth_encoding) if isinstance(
                creds, list) else creds
            proxyPort = int(proxyPort) if isinstance(proxyPort,
                                                     bytes) else proxyPort
            proxyHost = proxyHost.decode(self.auth_encoding)

            logger.warning(*self.lfm.crawled(
                "Spider", self.spider.name, '正在通过代理<%s>下载,延迟了%6.3f,时间为:' %
                (proxy, delay_time), {
                    'request': request,
                    'time': time.clock()
                }))
            logger.info(
                *self.lfm.crawled('Spider', self.spider.name, '使用代理:%s:%s' %
                                  (proxyHost, str(proxyPort)), request))
            if scheme == b'https':
                proxyConfig = (proxyHost, proxyPort, creds_02)
                request.headers.removeHeader('Proxy-Authorization')
                return TunnelingAgent(reactor,
                                      proxyConfig,
                                      contextFactory=self._contextFactoryProxy,
                                      connectTimeout=timeout,
                                      bindAddress=self._bindAddress,
                                      pool=self._pool)
            else:
                endpoint = TCP4ClientEndpoint(reactor, proxyHost, proxyPort)
                return ProxyAgent(endpoint=endpoint, pool=self._pool)

        else:
            return self._Agent(reactor,
                               contextFactory=self._contextFactory,
                               connectTimeout=timeout,
                               bindAddress=self._bindAddress,
                               pool=self._pool)
Esempio n. 30
0
 def get_page(self, url, *args, **kwds):
     """
     Define our own get_page method so that we can easily override the
     factory when we need to. This was copied from the following:
         * twisted.web.client.getPage
         * twisted.web.client._makeGetterFactory
     """
     contextFactory = None
     scheme, host, port, path = parse(url)
     data = kwds.get('postdata', None)
     self._method = method = kwds.get('method', 'GET')
     self.request_headers = self._headers(kwds.get('headers', {}))
     if (self.body_producer is None) and (data is not None):
         self.body_producer = FileBodyProducer(StringIO(data))
     if scheme == "https":
         proxy_endpoint = os.environ.get("https_proxy")
         if proxy_endpoint:
             proxy_url = urlparse.urlparse(proxy_endpoint)
             endpoint = TCP4ClientEndpoint(self.reactor, proxy_url.hostname, proxy_url.port)
             agent = ProxyAgent(endpoint)
         else:
             if self.endpoint.ssl_hostname_verification:
                 contextFactory = WebVerifyingContextFactory(host)
             else:
                 contextFactory = WebClientContextFactory()
             agent = Agent(self.reactor, contextFactory)
         self.client.url = url
         d = agent.request(method, url, self.request_headers,
             self.body_producer)
     else:
         proxy_endpoint = os.environ.get("http_proxy")
         if proxy_endpoint:
             proxy_url = urlparse.urlparse(proxy_endpoint)
             endpoint = TCP4ClientEndpoint(self.reactor, proxy_url.hostname, proxy_url.port)
             agent = ProxyAgent(endpoint)
         else:
             agent = Agent(self.reactor)
         d = agent.request(method, url, self.request_headers,
             self.body_producer)
     d.addCallback(self._handle_response)
     return d
Esempio n. 31
0
 def request_url(self, port, url):
     ouinet_client_endpoint = TCP4ClientEndpoint(reactor, "127.0.0.1", port)
     agent = ProxyAgent(ouinet_client_endpoint)
     return agent.request("GET", url)
Esempio n. 32
0
from cyclone import escape
from cyclone.web import HTTPError

from twisted.internet import defer
from twisted.internet import reactor
from twisted.internet.protocol import Protocol
from twisted.internet.endpoints import TCP4ClientEndpoint

from twisted.web.client import Agent, ProxyAgent
from twisted.web.http_headers import Headers
from twisted.web.iweb import IBodyProducer

from zope.interface import implements

agent = Agent(reactor)
proxy_agent = ProxyAgent(None, reactor)


class StringProducer(object):
    implements(IBodyProducer)

    def __init__(self, body):
        self.body = body
        self.length = len(body)

    def startProducing(self, consumer):
        consumer.write(self.body)
        return defer.succeed(None)

    def pauseProducing(self):
        pass
Esempio n. 33
0

def cbBody(body):
    # print('Response body:')
    body = body.decode('utf8')
    return body


pool = HTTPConnectionPool(reactor)
endpoint = TCP4ClientEndpoint(reactor, settings.PROXY_ADDRESS,
                              settings.PROXY_PORT)
tunnelingAgent = TunnelingAgent(
    reactor, (settings.PROXY_ADDRESS, settings.PROXY_PORT, None),
    BrowserLikePolicyForHTTPS(),
    pool=pool)
proxyAgent = ProxyAgent(endpoint, pool=pool)
normalAgent = Agent(reactor, pool=pool)
# pool = None


def get(reactor, url, headers={}, body=None):

    ssl = url.split(':')[0]

    if ssl == 'https' and settings.USE_PROXY:
        agent = tunnelingAgent
    else:
        if settings.USE_PROXY:
            agent = proxyAgent
        else:
            agent = normalAgent
Esempio n. 34
0
    def json_call(self, method, **kwargs):
        https = kwargs.pop('https', False)
        blowfish = kwargs.pop('blowfish', True)

        url_args = {'method': method}
        if self.partner_id:
            url_args['partner_id'] = self.partner_id
        if self.user_id:
            url_args['user_id'] = self.user_id
        if self.user_auth_token or self.partner_auth_token:
            url_args['auth_token'] = (self.user_auth_token or
                                      self.partner_auth_token)

        protocol = 'https' if https else 'http'
        url = protocol + self.rpc_url + urllib.urlencode(url_args)

        if self.time_offset:
            kwargs['syncTime'] = int(time.time() + self.time_offset)
        if self.user_auth_token:
            kwargs['userAuthToken'] = self.user_auth_token
        elif self.partner_auth_token:
            kwargs['partnerAuthToken'] = self.partner_auth_token

        data = json.dumps(kwargs)

        if blowfish:
            data = self.pandora_encrypt(data)

        if self.proxy_host:
            endpoint = TCP4ClientEndpoint(reactor, self.proxy_host,
                                          self.proxy_port)
            agent = ProxyAgent(endpoint, WebClientContextFactory())
        else:
            agent = Agent(reactor, WebClientContextFactory())

        headers = Headers({'User-Agent': ['pithos'],
                           'Content-type': ['text/plain']})
        body = FileBodyProducer(StringIO(data))

        response = yield agent.request('POST', url, headers, body)
        body = yield readBody(response)
        tree = json.loads(body)
        if tree['stat'] == 'fail':
            code = tree['code']
            msg = tree['message']
            log.msg('fault code: {} message: {}'.format(code, msg))

            if code == API_ERROR.INVALID_AUTH_TOKEN:
                raise PandoraAuthTokenInvalid(msg)
            elif code == API_ERROR.COUNTRY_NOT_SUPPORTED:
                raise PandoraError(
                    "Pandora not available", code,
                    "Pandora is not available outside the US.")
            elif code == API_ERROR.API_VERSION_NOT_SUPPORTED:
                raise PandoraAPIVersionError(msg)
            elif code == API_ERROR.INSUFFICIENT_CONNECTIVITY:
                raise PandoraError(
                    "Out of sync", code, "Correct your system's clock.")
            elif code == API_ERROR.READ_ONLY_MODE:
                raise PandoraError(
                    "Pandora maintenance", code,
                    "Pandora is in read-only mode as it is performing "
                    "maintenance. Try again later.")
            elif code == API_ERROR.INVALID_LOGIN:
                raise PandoraError(
                    "Login Error", code, "Invalid username or password.")
            elif code == API_ERROR.LISTENER_NOT_AUTHORIZED:
                raise PandoraError(
                    "Pandora One Error", code,
                    "A Pandora One account is required to access this "
                    "feature.")
            elif code == API_ERROR.PARTNER_NOT_AUTHORIZED:
                raise PandoraError(
                    "Login Error", code, "Invalid Pandora partner keys.")
            else:
                raise PandoraError(msg, code)

        if 'result' in tree:
            returnValue(tree['result'])
Esempio n. 35
0
    def __init__(self, address: str, port: int, destination: str,
                 is_app_hosting: bool):
        self._address = address
        self._port = port
        self._destination = destination
        self._is_app_hosting = is_app_hosting
        self._zmq_factory = ZmqFactory()

        # if the ZMQ app is binding and hosting the server, we need to connect to that instead
        if is_app_hosting:
            zmq_socket_class = ZmqDealerConnection
            zmq_endpoint = ZmqEndpoint(ZmqEndpointType.connect,
                                       "tcp://%s:%d" % (address, port))
            LOG.info("Configured txZMQ for connecting to application "
                     "- connected to tcp://%s:%d" % (address, port))
        else:
            # otherwise, bind to the address/port and have them connect to us
            zmq_socket_class = ZmqRouterConnection
            zmq_endpoint = ZmqEndpoint(ZmqEndpointType.bind,
                                       "tcp://%s:%d" % (address, port))
            LOG.info("Configured txZMQ for application connecting to us "
                     "- socket bound to tcp://%s:%d" % (address, port))

        self._zmq_socket = zmq_socket_class(self._zmq_factory, zmq_endpoint)
        # store the socket identity of the client; we need it to send data back to the local ZMQ app
        self._zmq_socket_identity = None

        LOG.debug("Initializing socket and agent")
        # check if we want to use an HTTPS proxy; useful for Fiddler
        if USE_HTTPS_PROXY:
            self._twisted_agent = ProxyAgent(
                HostnameEndpoint(reactor, PROXY_HOST, PROXY_PORT), reactor)
            LOG.warning("Agent is using HTTP proxy for outbound work!")
        else:
            # otherwise, use the standard Agent with a nulled SSL verification process, since self-signed certificates
            # fail the connection process entirely
            self._twisted_agent = Agent(
                reactor, contextFactory=DisableSSLVerificationFactory())

        # setup auto-POST method for our socket
        def post_data(*zmq_data_recv):
            self._zmq_socket_identity = zmq_data_recv[0]
            data = zmq_data_recv[-1]
            LOG.debug("Received %d bytes of data" % len(data))
            # hash and base64 our data for validation and transportation
            data_hash = hashlib.sha256(data).hexdigest()
            b64_data = base64.b64encode(data)
            # POST it to the remote server
            request = self._twisted_agent.request(
                b'POST', (destination + "/zmq").encode(),
                Headers({
                    'User-Agent': ['ZMQ-HTTP-Bridge-Agent'],
                    'X-Verify-Hash': [data_hash]
                }),
                bodyProducer=StringProducer(b64_data))

            def handle_twisted_error(fail):
                # print out _all_ errors, since Twisted doesn't provide all exceptions
                for error in fail.value.reasons:
                    LOG.error("%s", str(error))

            request.addErrback(handle_twisted_error)
            request.addCallback(
                lambda ignored: LOG.debug("Request completed."))
            LOG.info("Forwarded data to destination (hash preview: %s)" %
                     data_hash[0:8])

        self._zmq_socket.gotMessage = post_data
Esempio n. 36
0
    def _test_proxy_alive(self,
                          host,
                          port,
                          protocol,
                          proxy_type,
                          url=b'http://www.baidu.com',
                          timeout=10):
        endpoint = TCP4ClientEndpoint(reactor, host, int(port))
        agent = ProxyAgent(endpoint)
        d = agent.request(b'GET', url)
        self.currents += 1
        proxy = '{}:{}'.format(host, port)
        key = 'proxy_info:' + proxy

        if proxy_type == 'rookies_checking':

            def _callback(ignored):
                pipe = self.conn.pipeline(False)
                pipe.zrem('rookies_checking', proxy)
                pipe.hset(key, 'failed_times', 0)
                # Move proxy from rookies to availables
                pipe.smove('rookie_proxies', 'available_proxies',
                           '{}://{}'.format(protocol, proxy))
                pipe.zadd('availables_checking', proxy, time.time() + 30)
                pipe.execute()

            def _errback(err):
                if self.conn.hincrby(key, 'failed_times', 1) < 3:
                    # If not reach the maximum of failed_times
                    # Since it is not important so re-check it after 10 seconds
                    self.conn.zadd('rookies_checking', proxy, time.time() + 10)
                else:
                    pipe = self.conn.pipeline(False)
                    pipe.zrem('rookies_checking', proxy)
                    pipe.smove('rookie_proxies', 'dead_proxies',
                               '{}://{}'.format(protocol, proxy))
                    pipe.execute()
        else:

            def _callback(ignored):
                pipe = self.conn.pipeline(False)
                pipe.hset(key, 'failed_times', 0)
                pipe.zadd('availables_checking', proxy, time.time() + 30)
                pipe.smove('lost_proxies', 'available_proxies',
                           '{}://{}'.format(protocol, proxy))
                pipe.execute()

            def _errback(err):
                pipe = self.conn.pipeline(False)
                if self.conn.hincrby(key, 'failed_times', 1) < 3:
                    pipe.zadd('availables_checking', proxy, time.time() + 10)
                    pipe.smove('available_proxies', 'lost_proxies',
                               '{}://{}'.format(protocol, proxy))
                else:
                    pipe.zrem('availables_checking', proxy)
                    pipe.smove('lost_proxies', 'dead_proxies',
                               '{}://{}'.format(protocol, proxy))
                    pipe.delete(key)
                pipe.execute()

        d.addCallbacks(_callback, _errback)
        reactor.callLater(timeout, d.cancel)

        def _clean(ignored):
            self.currents -= 1

        d.addBoth(_clean)
Esempio n. 37
0
    print(pformat(list(response.headers.getAllRawHeaders())))
    print('Response code:', response.code)
    print('Response phrase:', response.phrase)
    # url = response.headers.getRawHeaders('Location')[0].encode('utf-8')
    #
    # d = agent.request(b'GET',url, Headers({'Proxy-Authorization': [encode_user]}))
    # d.addCallback(cbRequest)
    # d.addCallback(get_onePage)

    # d = readBody(response)
    # d.addCallback(get_onePage)


    return d

host= "211.147.239.101"
port = 44370

user_name = base64.b64encode('spider:123456'.encode('utf-8')).strip()
encode_user = b'Basic '+user_name
print(encode_user)
endpoint = TCP4ClientEndpoint(reactor, host, port)

agent = ProxyAgent(endpoint)
d = agent.request(b"GET", b"https://www.baidu.com")
d.addCallback(redirect)
d.addErrback(display)
d.addCallback(lambda ignored: reactor.stop())
reactor.run()

Esempio n. 38
0
    print(pformat(list(response.headers.getAllRawHeaders())))
    print('Response code:', response.code)
    print('Response phrase:', response.phrase)
    # url = response.headers.getRawHeaders('Location')[0].encode('utf-8')
    #
    # d = agent.request(b'GET',url, Headers({'Proxy-Authorization': [encode_user]}))
    # d.addCallback(cbRequest)
    # d.addCallback(get_onePage)

    d = readBody(response)
    d.addCallback(get_onePage)

    return d


host_01 = "149.28.192.96"
host = '47.105.165.81'
port = 5527
# header = Headers()
user_name = base64.b64encode('spider:123456'.encode('utf-8')).strip()
encode_user = b'Basic ' + user_name
endpoint = TCP4ClientEndpoint(reactor, host, port)

agent = ProxyAgent(endpoint)
d = agent.request(b"GET", b"http://www.zimuzu.tv/",
                  Headers({'Proxy-Authorization': [encode_user]}))
d.addCallback(redirect)
d.addErrback(display)
d.addCallback(lambda ignored: reactor.stop())
reactor.run()