Ejemplo n.º 1
0
class PhantomJSMiddleware(object):
	#phantomJSService = PhantomJSService()

	def __init__(self):
		self.proxyFactory = HttpProxyFactory.getHttpProxyFactory()
		#self.proxy = self.proxyFactory.currentProxy
		self.phantomJSService = PhantomJSService()
		#logging.info('[PID:%s] PhantomJSMiddleware initialized with proxy:%s' %(os.getpid(),self.proxy))
		pass
	
	# overwrite process request  
	def process_request(self, request, spider):
		if request.meta.has_key('phantom'):
			logging.info('[PID:%s] PhantomJS Requesting: %s' %(os.getpid(),request.url))
			if request.meta.has_key('proxy'):
				proxy = request.meta['proxy']
				proxy = proxy.replace("http://","",1)
				content = self.phantomJSService.requestWithProxy(request.url,proxy)
			else:
				content = self.phantomJSService.requestByURL(request.url)
			
			if content is None or content.strip()=="" or content == '<html><head></head><body></body></html>':# 
				logging.debug("[PID:%s] PhantomJS Request failed!" %os.getpid())
				return HtmlResponse(request.url, encoding = 'utf-8', status = 503, body = '')  
			else:
				logging.debug("[PID:%s]PhantomJS Request success!" %os.getpid())
				return HtmlResponse(request.url, encoding = 'utf-8', status = 200, body = content)
Ejemplo n.º 2
0
	def __init__(self):
		self.proxyFactory = HttpProxyFactory.getHttpProxyFactory()
		#self.proxy = self.proxyFactory.currentProxy
		self.phantomJSService = PhantomJSService()
		#logging.info('[PID:%s] PhantomJSMiddleware initialized with proxy:%s' %(os.getpid(),self.proxy))
		pass