def download(url): try: r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT) r.encoding = chardet.detect(r.content)['encoding'] if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception: count = 0 # 重试次数 start = random.choice([num for num in range(1, 100)]) allIPS = session.query(IP).count() start_index = start * allIPS / 100 end_index = start_index + 30 if end_index > allIPS: end_index = allIPS proxylist = session.query(IP).all()[start_index:end_index] if not proxylist: return None while count < config.RETRY_TIME: try: proxy = random.choice(proxylist) ip = proxy.ip port = proxy.port proxies = { "http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port) } r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) r.encoding = chardet.detect(r.content)['encoding'] if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception: g_count += 1
def check_and_store_ValidIP(ip, port, type, protocol): if check_ip(ip): print("ip: %s port: %s type: %d protocol: %d" % (ip, port, type, protocol)) proxy = IP(id=session.query(IP).count() + 1, ip=ip, port=port, types=int(type), protocol=int(protocol)) session.add(proxy) try: session.commit() except Exception: session.rollback()
def download(url): try: r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT) r.encoding = chardet.detect(r.content)['encoding'] if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception: count = 0 # 重试次数 # proxylist = sqlhelper.select(10) proxylist = session.query(IP).all()[1:20] if not proxylist: return None while count < config.RETRY_TIME: try: proxy = random.choice(proxylist) ip = proxy.ip port = proxy.port proxies = { "http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port) } r = requests.get(url=url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies) r.encoding = chardet.detect(r.content)['encoding'] if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception: count += 1
def getfreeip(num): return session.query(IP).all()[1:num]