예제 #1
0
def start(key, sort_type):
    reatefile(key)
    proxies = Proxies().main()
    # proxies={}
    for page_num in range(1, 15):
        try:
            url_list = search(key, sort_type, page_num, proxies)
            # print('****',url_list)
            for url in url_list:
                link(key, url, proxies)
                time.sleep(random.uniform(1, 3))
        except Exception as e:
            print('* error *', e)
            if len(url_list) == 0:
                proxies = Proxies().main()
예제 #2
0
def main():
    loop = asyncio.get_event_loop()
    with aiohttp.ClientSession(loop=loop) as session:
        proxies = Proxies(
            session, 30,
            'http://gimmeproxy.com/api/getProxy?protocol=http&supportsHttps=true&maxCheckPeriod=3600'
        )
        area = Area('https://www.freecycle.org/browse/UK/London')
        groups = loop.run_until_complete(
            area.get_groups(session, proxies, SEARCH_TERMS, FROM, TO))
    display(groups)
예제 #3
0
    def __init__(self,
                 dest: str = 'en',
                 src: str = 'auto',
                 proxy_size=20,
                 th_size=0):
        self.dest = dest
        self.src = src

        self.proxies = Proxies(proxy_len=proxy_size)
        self.proxies.verify_proxies()
        self.loading_index = 0
        self.loading_len = 0
        self.th_size = th_size
예제 #4
0
def main():
    if len(sys.argv) < 2:
        print("Usage: " + sys.argv[0] + " <board>")
        return

    board = sys.argv[1]

    posts = {}
    try:
        posts = json.loads(open("data/posts_%s.json" % board).read())
    except Exception as e:
        pass

    regexps_like = [
        regex.split("\n")[0]
        for regex in open("data/regexps_like").readlines()
    ]
    regexps_dislike = [
        regex.split("\n")[0]
        for regex in open("data/regexps_dislike").readlines()
    ]
    comparator_dislike = Comparator(
        open("data/comparator.wasm", "rb").read(), [
            base64.b64decode(image.split("\n")[0])
            for image in open("data/images").readlines()
        ])
    checker = Checker(regexps_like, regexps_dislike, comparator_dislike)

    proxies = Proxies(
        [proxy.split("\n")[0] for proxy in open("data/proxies").readlines()])
    network = Network(proxies, 10)

    liker = Liker(board, checker, posts, network)

    network.start()
    network.join()
예제 #5
0
    def __init__(self, proxy_file, proxy_bypass_percent, **kwargs):
	self.bypass_percent = int(proxy_bypass_percent)
        self.proxies = Proxies(proxy_file, **kwargs)
예제 #6
0
 def __init__(self):
     self.proxies = Proxies()
예제 #7
0
    def __init__(self, cache_file):
        if (cache_file is None):
            cache_file = '/tmp/__proxy_list_cache.json'

        self.proxies = Proxies(cache_file)
예제 #8
0
    run = True
    task = 0

    while run:
        task_n = int(show_task())
        try:
            task = task_n
        except ValueError as e:
            print('请输入正确的序号')
            continue
        if task == 0:
            run = False
        elif task == 1:
            create_db()
        elif task == 2:
            Proxies().run()
        elif task == 3:
            while os.path.exists(
                    os.path.abspath(os.path.dirname(__file__)) + '\data'):
                shutil.rmtree(
                    os.path.abspath(os.path.dirname(__file__)) + '\data')
            WeChatSogou().run_hy()
        elif task == 4:
            names = input('''
            ** 请输入要爬取的公众号:(可输入多个)
            ''').strip('"')
            names = [name for name in names.split(',')]
            proxy = input('''
            ** 选择手动输入代理: (ip:port(目前只接收一个)|自动请回车)
            ''')
            WeChatSogou().run_gzh(names=names, iproxy=proxy)
예제 #9
0
#!/usr/bin/env python3

""""""
from logger import Logger
from proxies import Proxies
from scraper import Scraper
from spoofing import Headers


if __name__ == "__main__":
    headers = Headers()
    logger = Logger()
    proxies = Proxies(headers, logger)
    scraper = Scraper(headers, proxies, logger)
    scraper.scrape()