def parse(input_file, url, timeout, headless, bin_path, chrome_args): """ Parse proxies from file or URL """ parser = proxytools.parser.ProxyParser() chrome_args = chrome_args.split(',') _args = [] for arg in chrome_args: if len(arg) > 0: if not arg.startswith('--'): arg = '--{}'.format(arg) _args.append(arg) chrome_args = _args if input_file: html = html_file.read() proxies = [str(p) for p in parser.parse_proxies(html)] elif url: client = proxytools.Client() try: page = client.get_pages([url], timeout=timeout, headless=headless, bin_path=bin_path, chrome_args=chrome_args)[0] proxies = [str(p) for p in parser.parse_proxies(page.html)] except IndexError: raise CliError('Could not get page') else: raise CliError('Supply --input-file or --url') print(json.dumps(proxies, indent=4))
def get(test_url, headless, tab_concurrency, browser_concurrency, limit, selector, source_num, geo, bin_path, chrome_args, debug): """ Get a working proxy """ chrome_args = chrome_args.split(',') _args = [] for arg in chrome_args: if len(arg) > 0: if not arg.startswith('--'): arg = '--{}'.format(arg) _args.append(arg) client = proxytools.Client(debug=True) results = client.get_proxies(test_url, headless=headless, tab_concurrency=tab_concurrency, browser_concurrency=browser_concurrency, limit=limit, selector=selector, source_num=source_num, bin_path=bin_path, chrome_args=chrome_args) if geo: wait = 1 # seconds between WHOIS request for result in results: proxy = proxytools.proxy.Proxy.from_string(result['proxy']) country = proxy.country() result['country'] = country time.sleep(wait) print(json.dumps(results, indent=4))
def search(source_num, bin_path, chrome_args): """ Scrape proxies from the web """ chrome_args = chrome_args.split(',') _args = [] for arg in chrome_args: if len(arg) > 0: if not arg.startswith('--'): arg = '--{}'.format(arg) _args.append(arg) chrome_args = _args client = proxytools.Client() proxies = client.search_proxies(source_num=source_num, bin_path=bin_path, chrome_args=chrome_args) urls = [str(p) for p in proxies] print(json.dumps(urls, indent=4))
def sources(headless, num, bin_path, chrome_args): """ Search Google for proxy sources """ chrome_args = chrome_args.split(',') _args = [] for arg in chrome_args: if len(arg) > 0: if not arg.startswith('--'): arg = '--{}'.format(arg) _args.append(arg) chrome_args = _args client = proxytools.Client() urls = client.get_source_urls(headless=headless, num=num, bin_path=bin_path, chrome_args=chrome_args) print(json.dumps(urls, indent=4))
def test(proxy, url, headless, browser_concurrency, selector, bin_path, chrome_args): """ Test a proxy for a given URL """ chrome_args = chrome_args.split(',') _args = [] for arg in chrome_args: if len(arg) > 0: if not arg.startswith('--'): arg = '--{}'.format(arg) _args.append(arg) client = proxytools.Client() results = client.test_proxies([proxy], url, headless=headless, browser_concurrency=browser_concurrency, selector=selector) print(json.dumps(results, indent=4))