Ejemplo n.º 1
0
def parse(input_file, url, timeout, headless, bin_path, chrome_args):
    """
    Parse proxies from file or URL
    """
    parser = proxytools.parser.ProxyParser()
    chrome_args = chrome_args.split(',')
    _args = []
    for arg in chrome_args:
        if len(arg) > 0:
            if not arg.startswith('--'):
                arg = '--{}'.format(arg)
            _args.append(arg)
    chrome_args = _args

    if input_file:
        html = html_file.read()
        proxies = [str(p) for p in parser.parse_proxies(html)]
    elif url:
        client = proxytools.Client()
        try:
            page = client.get_pages([url],
                                    timeout=timeout,
                                    headless=headless,
                                    bin_path=bin_path,
                                    chrome_args=chrome_args)[0]
            proxies = [str(p) for p in parser.parse_proxies(page.html)]
        except IndexError:
            raise CliError('Could not get page')
    else:
        raise CliError('Supply --input-file or --url')

    print(json.dumps(proxies, indent=4))
Ejemplo n.º 2
0
def get(test_url, headless, tab_concurrency, browser_concurrency, limit,
        selector, source_num, geo, bin_path, chrome_args, debug):
    """
    Get a working proxy
    """
    chrome_args = chrome_args.split(',')
    _args = []
    for arg in chrome_args:
        if len(arg) > 0:
            if not arg.startswith('--'):
                arg = '--{}'.format(arg)
            _args.append(arg)
    client = proxytools.Client(debug=True)
    results = client.get_proxies(test_url,
                                 headless=headless,
                                 tab_concurrency=tab_concurrency,
                                 browser_concurrency=browser_concurrency,
                                 limit=limit,
                                 selector=selector,
                                 source_num=source_num,
                                 bin_path=bin_path,
                                 chrome_args=chrome_args)
    if geo:
        wait = 1  #  seconds between WHOIS request
        for result in results:
            proxy = proxytools.proxy.Proxy.from_string(result['proxy'])
            country = proxy.country()
            result['country'] = country
            time.sleep(wait)
    print(json.dumps(results, indent=4))
Ejemplo n.º 3
0
def search(source_num, bin_path, chrome_args):
    """
    Scrape proxies from the web
    """
    chrome_args = chrome_args.split(',')
    _args = []
    for arg in chrome_args:
        if len(arg) > 0:
            if not arg.startswith('--'):
                arg = '--{}'.format(arg)
            _args.append(arg)
    chrome_args = _args
    client = proxytools.Client()
    proxies = client.search_proxies(source_num=source_num,
                                    bin_path=bin_path,
                                    chrome_args=chrome_args)
    urls = [str(p) for p in proxies]
    print(json.dumps(urls, indent=4))
Ejemplo n.º 4
0
def sources(headless, num, bin_path, chrome_args):
    """
    Search Google for proxy sources
    """
    chrome_args = chrome_args.split(',')
    _args = []
    for arg in chrome_args:
        if len(arg) > 0:
            if not arg.startswith('--'):
                arg = '--{}'.format(arg)
            _args.append(arg)
    chrome_args = _args
    client = proxytools.Client()
    urls = client.get_source_urls(headless=headless,
                                  num=num,
                                  bin_path=bin_path,
                                  chrome_args=chrome_args)
    print(json.dumps(urls, indent=4))
Ejemplo n.º 5
0
def test(proxy, url, headless, browser_concurrency, selector, bin_path,
         chrome_args):
    """
    Test a proxy for a given URL
    """
    chrome_args = chrome_args.split(',')
    _args = []
    for arg in chrome_args:
        if len(arg) > 0:
            if not arg.startswith('--'):
                arg = '--{}'.format(arg)
            _args.append(arg)
    client = proxytools.Client()
    results = client.test_proxies([proxy],
                                  url,
                                  headless=headless,
                                  browser_concurrency=browser_concurrency,
                                  selector=selector)
    print(json.dumps(results, indent=4))