def start(key, sort_type): reatefile(key) proxies = Proxies().main() # proxies={} for page_num in range(1, 15): try: url_list = search(key, sort_type, page_num, proxies) # print('****',url_list) for url in url_list: link(key, url, proxies) time.sleep(random.uniform(1, 3)) except Exception as e: print('* error *', e) if len(url_list) == 0: proxies = Proxies().main()
def main(): loop = asyncio.get_event_loop() with aiohttp.ClientSession(loop=loop) as session: proxies = Proxies( session, 30, 'http://gimmeproxy.com/api/getProxy?protocol=http&supportsHttps=true&maxCheckPeriod=3600' ) area = Area('https://www.freecycle.org/browse/UK/London') groups = loop.run_until_complete( area.get_groups(session, proxies, SEARCH_TERMS, FROM, TO)) display(groups)
def __init__(self, dest: str = 'en', src: str = 'auto', proxy_size=20, th_size=0): self.dest = dest self.src = src self.proxies = Proxies(proxy_len=proxy_size) self.proxies.verify_proxies() self.loading_index = 0 self.loading_len = 0 self.th_size = th_size
def main(): if len(sys.argv) < 2: print("Usage: " + sys.argv[0] + " <board>") return board = sys.argv[1] posts = {} try: posts = json.loads(open("data/posts_%s.json" % board).read()) except Exception as e: pass regexps_like = [ regex.split("\n")[0] for regex in open("data/regexps_like").readlines() ] regexps_dislike = [ regex.split("\n")[0] for regex in open("data/regexps_dislike").readlines() ] comparator_dislike = Comparator( open("data/comparator.wasm", "rb").read(), [ base64.b64decode(image.split("\n")[0]) for image in open("data/images").readlines() ]) checker = Checker(regexps_like, regexps_dislike, comparator_dislike) proxies = Proxies( [proxy.split("\n")[0] for proxy in open("data/proxies").readlines()]) network = Network(proxies, 10) liker = Liker(board, checker, posts, network) network.start() network.join()
def __init__(self, proxy_file, proxy_bypass_percent, **kwargs): self.bypass_percent = int(proxy_bypass_percent) self.proxies = Proxies(proxy_file, **kwargs)
def __init__(self): self.proxies = Proxies()
def __init__(self, cache_file): if (cache_file is None): cache_file = '/tmp/__proxy_list_cache.json' self.proxies = Proxies(cache_file)
run = True task = 0 while run: task_n = int(show_task()) try: task = task_n except ValueError as e: print('请输入正确的序号') continue if task == 0: run = False elif task == 1: create_db() elif task == 2: Proxies().run() elif task == 3: while os.path.exists( os.path.abspath(os.path.dirname(__file__)) + '\data'): shutil.rmtree( os.path.abspath(os.path.dirname(__file__)) + '\data') WeChatSogou().run_hy() elif task == 4: names = input(''' ** 请输入要爬取的公众号:(可输入多个) ''').strip('"') names = [name for name in names.split(',')] proxy = input(''' ** 选择手动输入代理: (ip:port(目前只接收一个)|自动请回车) ''') WeChatSogou().run_gzh(names=names, iproxy=proxy)
#!/usr/bin/env python3 """""" from logger import Logger from proxies import Proxies from scraper import Scraper from spoofing import Headers if __name__ == "__main__": headers = Headers() logger = Logger() proxies = Proxies(headers, logger) scraper = Scraper(headers, proxies, logger) scraper.scrape()