#!/usr/bin/python from Harvester import WebParser url = "http://www.mrhinkydink.com/proxies%s.htm" regex = r'\<td\>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\</td\>\n*\<td\>(\d{2,5})\</td\>' pages = (None, 2, 5) wp = WebParser(url, regex, pages) for remote in wp.remotes(): wp.scan(remote)
#!/usr/bin/python from Harvester import WebParser url = "http://www.xroxy.com/proxylist.php?port=&type=All_http&pnum=%d" ip_regex = r"View this Proxy details.*\>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" port_regex = r"Select proxies with port number (\d{2,5})" pages = 250 wp = WebParser(url, (ip_regex, port_regex), pages) for remote in wp.remotes(): wp.scan(remote)
#!/usr/bin/python from Harvester import WebParser url = "http://free-proxy.cz/en/proxylist/main/%d" # </div> 117.175.231.117</td> # <td><span class="fport">8123</span></td> ip_regex = r"\<\/div\> (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\<\/td\>" port_regex = r'\<span\ class\=\"fport\"\>(\d{2,5})\<\/span\>' pages = 179 wp = WebParser(url,(ip_regex,port_regex),pages) for remote in wp.remotes(): wp.scan(remote)