def testProxyClass(): proxy = Proxy("127.0.0.1:8080") print(proxy.to_json) proxy.source = "test" proxy_str = json.dumps(proxy.to_dict, ensure_ascii=False) print(proxy_str) print(Proxy.createFromJson(proxy_str).to_dict)
def testExist(): client = SqlClient() sql = 'select url from proxy where tag="b47w"' urls = [i[0] for i in client.engine.execute(sql).fetchall()] print(urls) proxy_queue = Queue() for url in urls: proxy = Proxy(url) proxy.tag = 'b47w' proxy_queue.put(proxy) checker = Checker('raw', proxy_queue, f"thread_01") checker.start() checker.join()
def freeProxy20(): source = 'premproxy.com' urls = [ 'https://premproxy.com/list/ip-port/1.htm', 'https://premproxy.com/list/ip-port/2.htm', 'https://premproxy.com/list/ip-port/3.htm', ] proxies = {'http': MAINPROXY, 'https': MAINPROXY} for url in urls: tree = WebRequest().get(url, proxies=proxies).tree if tree is None: return None ret = tree.xpath('//ul[@id="ipportlist"]/li') for r in ret: try: ip = r.xpath('./li/text()')[0][:-1] # ip_mask = re.search('(?:")(.*)(?:")', # ip_script).groups()[0] # ip = re.search('(?:>)([0-9\.]+)(?:<)', # unquote(ip_mask, 'utf8')).groups()[0] port = r.xpath('./li/span/text()')[0] protocol = 'https' yield Proxy(f'{protocol}://{ip}:{port}', source=source) except Exception as e: print(type(e), e)
def getAll(self): """ get all proxy from pool as Proxy list :return: """ proxies_dict = self.db.getAll() return [Proxy.createFromJson(value) for _, value in proxies_dict.items()]
def testRedisClient(): from db.dbClient import DbClient from helper.proxy import Proxy uri = "redis://:[email protected]:6379" db = DbClient(uri) db.changeTable("use_proxy") proxy = Proxy.createFromJson( '{"proxy": "27.38.96.101:9797", "fail_count": 0, "region": "", "type": "",' ' "source": "freeProxy03", "check_count": 0, "last_status": "", "last_time": ""}' ) print("put: ", db.put(proxy)) print("get: ", db.get()) print("exists: ", db.exists("27.38.96.101:9797")) print("exists: ", db.exists("27.38.96.101:8888")) print("pop: ", db.pop()) print("getAll: ", db.getAll()) print("getCount", db.getCount())
def freeProxy16(): proxies = {'http': MAINPROXY, 'https': MAINPROXY} source = 'free-proxy.cz' urls = [ 'http://free-proxy.cz/en/proxylist/country/all/socks5/ping/all', 'http://free-proxy.cz/en/proxylist/country/all/socks5/ping/all/2', 'http://free-proxy.cz/en/proxylist/country/all/socks5/ping/all/3', 'http://free-proxy.cz/en/proxylist/country/all/socks5/ping/all/4', 'http://free-proxy.cz/en/proxylist/country/all/socks5/ping/all/5', ] for url in urls: r = WebRequest().get(url, proxies=proxies) if r.response.status_code == 200: ret = r.tree for tr in ret.xpath('//table[@id="proxy_list"]//tr')[1:]: try: ip_script = tr.xpath('./td[1]/script/text()')[0] ip_base64 = re.search('(?:")([\w=]+)(?:")', ip_script).groups()[0] ip = base64.b64decode(ip_base64).decode('utf8') port = tr.xpath('./td[2]/span/text()')[0] protocol = ''.join(tr.xpath('./td[3]/small/text()')) yield Proxy(f'{protocol}://{ip}:{port}', source=source) except Exception as e: print(e)
def getAll(self, https=False): """ get all proxy from pool as Proxy list :return: """ proxies = self.db.getAll(https) return [Proxy.createFromJson(_) for _ in proxies]
def _runProxyFetch(): proxy_queue = Queue() for proxy in runFetcher(): proxy_queue.put(Proxy(proxy).to_json) runChecker("raw", proxy_queue)
def update_fail_count(self, proxy_str): proxy = self.db.get(proxy_str) if not proxy: return None proxy = Proxy.createFromJson(proxy) proxy.fail_count += 1 self.db.update(proxy) return proxy
def get(self, https=False): """ return a proxy Args: https: True/False Returns: """ proxy = self.db.get(https) return Proxy.createFromJson(proxy) if proxy else None
def _runProxyFetch(): proxy_queue = Queue() proxy_handler = ProxyHandler() # 当代理池中数量小于poolSizeMin时,再抓取 if proxy_handler.db.getCount() < proxy_handler.conf.poolSizeMin: for proxy in runFetcher(): proxy_queue.put(Proxy(proxy).to_json) runChecker("raw", proxy_queue)
def getByTag(self, tag): """ return a useful proxy by tag :return: """ proxy = self.db.getByTag(tag) if proxy: return Proxy.createFromJson(proxy) return None
def get(self): """ return a useful proxy :return: """ proxy = self.db.get() if proxy: return Proxy.createFromJson(proxy) return None
def pop(self): """ return and delete a useful proxy :return: """ proxy = self.db.pop() if proxy: return Proxy.createFromJson(proxy) return None
def testMysqlClient(): db = SqlClient() proxy = Proxy('socks://127.0.0.1:1000', tag='test') # 增加 # db.put(proxy) # 取一个 geted = db.get('default') print(geted)
def getAll(self, tag=None): """ 字典形式返回所有代理, 使用changeTable指定hash name :return: """ sql = 'select url, score, proxy_type, tag from proxy' if tag is not None: sql += f' where tag="{tag}"' df = pd.read_sql(sql, self.engine) proxies = [] for index, row in df.iterrows(): proxy = Proxy.createFromJson(row.to_json()) proxies.append(proxy) return proxies
def run(self): self.log.info("ProxyFetch - {func}: start".format(func=self.fetch_source)) try: for proxy in self.fetcher(): self.log.info('ProxyFetch - %s: %s ok' % (self.fetch_source, proxy.ljust(23))) proxy = proxy.strip() if proxy in self.proxy_dict: self.proxy_dict[proxy].add_source(self.fetch_source) else: self.proxy_dict[proxy] = Proxy( proxy, source=self.fetch_source) except Exception as e: self.log.error("ProxyFetch - {func}: error".format(func=self.fetch_source)) self.log.error(str(e))
def testRedisClient(): from db.dbClient import DbClient from helper.proxy import Proxy uri = "redis://:[email protected]:6379" db = DbClient(uri) db.changeTable("use_proxy") proxy = Proxy.createFromJson( '{"proxy": "27.38.96.101:9797", "fail_count": 0, "region": "", "type": "",' ' "source": "freeProxy03", "check_count": 0, "last_status": "", "last_time": ""}') # print("put: ", db.put(proxy)) # print("put: ", db.putTag(tag='test', proxy='1238:7')) # print("del: ", db.deleteTag(tag='test', proxy='1234')) print("get: ", db.getByTag(tag='test'))
def testProxyClass1(): proxy = Proxy("https://127.0.0.1:8080") print(proxy.to_json) proxy.score = 10 proxy.proxy_type = "test" proxy.tag = "高匿" proxy_str = json.dumps(proxy.to_dict, ensure_ascii=False) print(proxy_str) print(Proxy.createFromJson(proxy_str).to_dict)
def run(self): self.log.info("ProxyCheck - {} : start".format(self.name)) while True: try: proxy_json = self.queue.get(block=False) except Empty: self.log.info("ProxyCheck - {} : complete".format(self.name)) break proxy = Proxy.createFromJson(proxy_json) proxy = proxyCheck(proxy) if self.type == "raw": if proxy.last_status: if self.proxy_handler.exists(proxy): self.log.info('ProxyCheck - {} : {} exists'.format( self.name, proxy.proxy.ljust(23))) else: self.log.info('ProxyCheck - {} : {} success'.format( self.name, proxy.proxy.ljust(23))) self.proxy_handler.put(proxy) else: self.log.info('ProxyCheck - {} : {} fail'.format( self.name, proxy.proxy.ljust(23))) else: if proxy.last_status: self.log.info('ProxyCheck - {} : {} pass'.format( self.name, proxy.proxy.ljust(23))) self.proxy_handler.update(proxy) else: if proxy.fail_count > self.conf.maxFailCount: self.log.info( 'ProxyCheck - {} : {} fail, count {} delete'. format(self.name, proxy.proxy.ljust(23), proxy.fail_count)) self.proxy_handler.delete(proxy) else: self.log.info( 'ProxyCheck - {} : {} fail, count {} keep'.format( self.name, proxy.proxy.ljust(23), proxy.fail_count)) self.proxy_handler.update(proxy) self.queue.task_done()
def freeProxy17(): source = 'www.proxynova.com' urls = [ 'https://www.proxynova.com/proxy-server-list/elite-proxies/', ] proxies = {'http': MAINPROXY, 'https': MAINPROXY} for url in urls: tree = WebRequest().get(url, proxies=proxies).tree if tree is None: return None ret = tree.xpath('//*[@id="tbl_proxy_list"]/tbody/tr') for r in ret: try: ip_script = r.xpath('./td[1]/abbr/script/text()')[0] ip = re.search('(?:\')(.+)(?:\')', ip_script).groups()[0] port = r.xpath('./td[2]/text()')[0].strip() protocol = 'https' yield Proxy(f'{protocol}://{ip}:{port}', source=source) except Exception as e: print(e)
def run(self): """ fetch proxy with proxyFetcher :return: """ proxy_dict = dict() self.log.info("ProxyFetch : start") for fetch_source in self.conf.fetchers: self.log.info( "ProxyFetch - {func}: start".format(func=fetch_source)) fetcher = getattr(ProxyFetcher, fetch_source, None) if not fetcher: self.log.error( "ProxyFetch - {func}: class method not exists!".format( func=fetch_source)) continue if not callable(fetcher): self.log.error( "ProxyFetch - {func}: must be class method".format( func=fetch_source)) continue try: for proxy in fetcher(): self.log.info('ProxyFetch - %s: %s ok' % (fetch_source, proxy.ljust(23))) proxy = proxy.strip() if proxy in proxy_dict: proxy_dict[proxy].add_source(fetch_source) else: proxy_dict[proxy] = Proxy(proxy, source=fetch_source) except Exception as e: self.log.error( "ProxyFetch - {func}: error".format(func=fetch_source)) self.log.error(str(e)) self.log.info("ProxyFetch - all complete!") for _ in proxy_dict.values(): if DoValidator.preValidator(_.proxy): yield _
def freeProxy21(): source = 'www.proxyranker.com' urls = [ 'https://www.proxyranker.com/china/list/', 'https://www.proxyranker.com/china/list-2/', 'https://www.proxyranker.com/china/list-3/', 'https://www.proxyranker.com/china/list-4/', ] proxies = {'http': MAINPROXY, 'https': MAINPROXY} for url in urls: tree = WebRequest().get(url, proxies=proxies).tree if tree is None: return None ret = tree.xpath('//div[@class="bl"]//tr')[1:] for r in ret[:-1]: try: ip = r.xpath('./td[1]/text()')[0] port = r.xpath('./td[4]/span/text()')[0] protocol = 'https' yield Proxy(f'{protocol}://{ip}:{port}', source=source) except Exception as e: print(type(e), e)
def testRedisClient(): from db.dbClient import DbClient from helper.proxy import Proxy uri = "redis://:[email protected]:6379" db = DbClient(uri) db.changeTable("use_proxy") proxy = Proxy.createFromJson('{"proxy": "118.190.79.36:8090", "https": false, "fail_count": 0, "region": "", "anonymous": "", "source": "freeProxy14", "check_count": 4, "last_status": true, "last_time": "2021-05-26 10:58:04"}') print("put: ", db.put(proxy)) print("get: ", db.get(https=None)) print("exists: ", db.exists("27.38.96.101:9797")) print("exists: ", db.exists("27.38.96.101:8888")) print("pop: ", db.pop(https=None)) print("getAll: ", db.getAll(https=None)) print("getCount", db.getCount())
def freeProxy19(): source = 'www.freeproxylists.net' urls = [ 'http://www.freeproxylists.net/zh/?c=&pt=&pr=HTTPS&a%5B%5D=0&a%5B%5D=1&a%5B%5D=2&u=50', ] proxies = {'http': MAINPROXY, 'https': MAINPROXY} for url in urls: tree = WebRequest().get(url, proxies=proxies).tree if tree is None: return None ret = tree.xpath('//tr')[4:] for r in ret: try: ip_script = r.xpath('./td[1]/script/text()')[0] ip_mask = re.search('(?:")(.*)(?:")', ip_script).groups()[0] ip = re.search('(?:>)([0-9\.]+)(?:<)', unquote(ip_mask, 'utf8')).groups()[0] port = r.xpath('./td[2]/text()')[0] protocol = r.xpath('./td[3]/text()')[0] yield Proxy(f'{protocol}://{ip}:{port}', source=source) except Exception as e: print(type(e), e)
def freeProxy18(): source = 'spys.one' urls = [ 'https://spys.one/en/free-proxy-list/', ] # proxies = {'http': MAINPROXY, 'https': MAINPROXY} chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('--proxy-server=' + MAINPROXY) client = webdriver.Chrome(options=chrome_options) for url in urls: client.get(url) trs = client.find_elements_by_xpath( '//table/tbody/tr/td/table/tbody/tr') for r in trs[3:]: try: tds = r.find_elements_by_tag_name('td') ip = tds[0].find_element_by_xpath('./font').text protocol = tds[1].text.split(' ')[0] yield Proxy(f'{protocol}://{ip}', source=source) except Exception as e: print(e) client.close()
def delete(): proxy = request.args.get('proxy') status = proxy_handler.delete(Proxy(proxy)) return {"code": 0, "src": status}
def _runProxyFetch(): # proxy_queue = Queue() proxy_handler = ProxyHandler() for proxy in runFetcher(): proxy_handler.db.put(Proxy(proxy))