Beispiel #1
0
def crawl_proxies():
    loginfo.info('crawl proxies in goubanjia')
    root = get_html_to_tree(etc.goubanjia_url)
    proxies_list = get_proxies_info(root)
    proxy_db = proxy_io.ProxiesIO(db=etc.crawl_db)
    for proxy in proxies_list:
        # verify_proxy_validity.verify_proxy(proxy)
        proxy_db.insert_proxy(proxy)
Beispiel #2
0
def crawl_proxies():
    loginfo.info('crawl proxies in n89ip')
    for i in range(1, 20):
        time.sleep(3)
        root = get_html_to_tree(etc.s_89ip_url.format(i))
        proxies_list = get_proxies_info(root)
        proxy_db = proxy_io.ProxiesIO(db=etc.crawl_db)
        for proxy in proxies_list:
            proxy_db.insert_proxy(proxy)
def crawl_proxies(a=1, b=50):
    loginfo.info('crawl proxies in kuaidaili')
    for i in range(a, b):

        time.sleep(3)
        soup = get_html_to_soup(etc.s_kuaidaili_inha_url.format(i))
        proxies_list = get_proxies_info(soup)
        proxy_db = proxy_io.ProxiesIO(db=etc.crawl_db)
        for proxy in proxies_list:
            proxy_db.insert_proxy(proxy)
Beispiel #4
0
def crawl_proxies():
    loginfo.info('crawl proxies in mimvp not in loop')
    proxy_db = proxy_io.ProxiesIO(db=etc.crawl_db)
    j_dict = get_html_to_json(etc.mimvp_api_url)
    proxies_list = get_proxies_info(j_dict)
    for proxy in proxies_list:
        proxy_db.insert_proxy(proxy)
    loginfo.info('crawl proxies in mimvp not in loop end')
    if len(proxies_list) < 20:
        return False
    return True
Beispiel #5
0
def crawl_proxies_loop():
    proxy_db = proxy_io.ProxiesIO(db=etc.crawl_db)
    while True:
        loginfo.info('crawl proxies in mimvp in loop')
        s_time = time.time()
        j_dict = get_html_to_json(etc.mimvp_api_url)
        proxies_list = get_proxies_info(j_dict)
        for proxy in proxies_list:
            proxy_db.insert_proxy(proxy)
        time.sleep(0 if time.time() - s_time > 60 else 60 -
                   (time.time() - s_time))