def run(self): while self.isRun: try: do, args = self.work_queue.get(block=False) do(args) self.work_queue.task_done() except Exception, e: if str(type(e)) != "<class 'Queue.Empty'>": error_log(trace_back()) break
def urlopen(self,url,method = 'get',data = {},info = {},timeout = 30): _url = '' if method == "post": query = urllib.urlencode(data) _url = url url = url + "?" + query if self.cache: if self.checkUrlCacheExits(url): return self.getCacheContent(url) if self.opener is None: self.setOpener() v = {} for k in info: v[k] = info[k] v['url'] = url v['local'] = self.getUrlCacheFile(url) v['headers'] = '' v['cache'] = False v['body'] = '' self.setUrlCache(url,v) try: if method == "get": req = urllib2.Request(url) else: req = urllib2.Request(_url,query) req.add_header("User-Agent", self.user_agent) r = self.opener.open(req,timeout = timeout) except urllib2.HTTPError, e: self.delUrlCache(url) error_log(url+"\n"+trace_back()+"\n") return None
try: if method == "get": req = urllib2.Request(url) else: req = urllib2.Request(_url,query) req.add_header("User-Agent", self.user_agent) r = self.opener.open(req,timeout = timeout) except urllib2.HTTPError, e: self.delUrlCache(url) error_log(url+"\n"+trace_back()+"\n") return None except Exception , e: self.delUrlCache(url) error_log(url+"\n"+trace_back()+"\n") return None self.saveCookie() v['headers'] = dict(r.headers) v['body'] = r.read() self.setUrlCache(url,v) r.close() return v def setUrlCache(self,url,v,level = 3): #if self.cache == False: # return vv = {}