コード例 #1
0
def set_proxy():
    # get_proxy()
    # f=open('myproxylist.txt','r')
    # PROXY = f.read()
    # f.close()

    PROXY = get_proxy()
    time.sleep(15)
    webdriver.DesiredCapabilities.CHROME['proxy'] = {
        "httpProxy": PROXY,
        "ftpProxy": PROXY,
        "sslProxy": PROXY,
        "noProxy": None,
        "proxyType": "MANUAL",
        "autodetect": False
    }
コード例 #2
0
from getproxy import get_proxy
from setproxy import set_proxy
from fakeuseragent import myfakeuseragent
from driverconfig import mydriverconfig
import time

from random import choice
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

get_proxy()
set_proxy()
mydriverconfig()
driver = webdriver.Chrome(
    executable_path="C:/Users/Owner/chromedriver_win32/chromedriver.exe",
    chrome_options=myfakeuseragent())
driver.get('https://tipcracker.net')
#time.sleep(10)
driver.implicitly_wait(10)

NoInternet = driver.find_element_by_id("main-message").is_enabled()
#NoInternet2 = driver.find_element_by_class_name("current").is_enabled()
while (NoInternet):
    time.sleep(10)
    #NoInternet = driver.find_element_by_id("main-message").is_enabled()
    #if(NoInternet == False):
    #    break
    #print("proxy not good.. close browser!")
コード例 #3
0
            "mT": [],
            "kT": [],
            "aT": [],
            "tT": [],
            "sign": self.url_encode(sign).decode()
        }
        t = "https://ihotel.meituan.com/productapi/v2/prepayList?type=1&utm_medium=PC&version_name=" \
                "7.3.0&poiId=" + taken["poiId"] + "&start=" + \
        taken["start"] + "&end=" + taken["end"] + "&uuid=" + taken["uuid"] + "&_token=" + \
        self.url_encode(_tokon).decode()
        try:
            res = requests.get(t, headers=headers,
                               timeout=2.0)  #proxies=self.proxy
            data_list.append(
                int(re.findall(r'lowestPrice":[0-9]+', res.text)[0][13:]) /
                100)  # 最低价格(并没有神马卵用)
        except Exception as e:
            print(e)
            print(t)
            return None
        return data_list


if __name__ == '__main__':
    proxy = next(get_proxy())
    print(proxy)
    n = Crawl()
    n.proxy = proxy
    t = n.get_hotel_list('http://hotel.meituan.com/beijing/')
    t = n.crawl('http://hotel.meituan.com/1211661/')
    print(t)
コード例 #4
0
- 多线程,队列操作,断点续传

author:https://github.com/HANKAIluo
2018.3.18
"""

import threading
from queue import Queue
from crawl import Crawl
from datafile import Datafile
from getproxy import get_proxy
import time

Datafile = Datafile()

proxies = get_proxy()

thread_count = 40


class MasterThread:
    def __init__(self):
        self.count = {
            'count': 0,  #爬取总数
            'failed_count': 0,  #爬取失败总数
            'sucess_count': 0,  #成功爬取总数
            'start_time': time.asctime(),  #开始时间
            'end_time': 0,  #结束时间
        }
        self.endtime = time.localtime().tm_min + 1
        self.proxy = next(proxies)