def getIpstate(): ipstate = RedisUtils.get("ipstate") if (ipstate): return "%s" % ipstate else: RedisUtils.set("ipstate", 1) return "1"
def checkProxy(self): RedisUtils.flush() cookies = urllib2.HTTPCookieProcessor() for proxy in self.proxyList: proxyHandler = urllib2.ProxyHandler( {"http": r'http://%s:%s' % (proxy[0], proxy[1])}) opener = urllib2.build_opener(cookies, proxyHandler) opener.addheaders = [( 'User-agent', 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0' )] t1 = time.time() try: req = opener.open(self.testUrl, timeout=self.timeout) result = req.read() timeused = time.time() - t1 pos = result.find(self.testStr) if pos > 1 and int(timeused) < 20: checkedProxyList.append((proxy[0], proxy[1], timeused)) RedisUtils.set("ip1-%s" % proxy[0], proxy[0] + ":" + proxy[1]) logger.info("ip %s" % proxy[0]) else: continue except Exception, e: continue
def removeIp(ip): if (ip and len(ip) > 0): value = ip.get("http") value = value.replace("http://", "") nPos = value.index(":") if (nPos > 0): ip = value[0:nPos] RedisUtils.remove("ip%s-%s" % (getIpstate(), ip)) ipstate = int(getIpstate()) + 1 RedisUtils.set("ip%s-%s" % (ipstate, ip), value)
def process_proxy(): currentKey = getIpstate(); key = "ip%s-" % currentKey keys = RedisUtils.getKeys(key) keyNum = len(keys) if (keyNum < 1): logger.info("代理池异常数量<1 sleepping 120s") RedisUtils.set("ipstate", int(currentKey) + 1) time.sleep(1200) key = "ip%s-" % getIpstate() keys = RedisUtils.getKeys(key) keyNum = len(keys) randNum = random.randint(0, keyNum - 1) current = keys[randNum] value = RedisUtils.get(current) proxies = {} proxies["http"] = r'http://%s' % value removeIp(proxies) return proxies