def set_proxy(): # get_proxy() # f=open('myproxylist.txt','r') # PROXY = f.read() # f.close() PROXY = get_proxy() time.sleep(15) webdriver.DesiredCapabilities.CHROME['proxy'] = { "httpProxy": PROXY, "ftpProxy": PROXY, "sslProxy": PROXY, "noProxy": None, "proxyType": "MANUAL", "autodetect": False }
from getproxy import get_proxy from setproxy import set_proxy from fakeuseragent import myfakeuseragent from driverconfig import mydriverconfig import time from random import choice from selenium import webdriver from selenium.webdriver.common.proxy import Proxy, ProxyType from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys get_proxy() set_proxy() mydriverconfig() driver = webdriver.Chrome( executable_path="C:/Users/Owner/chromedriver_win32/chromedriver.exe", chrome_options=myfakeuseragent()) driver.get('https://tipcracker.net') #time.sleep(10) driver.implicitly_wait(10) NoInternet = driver.find_element_by_id("main-message").is_enabled() #NoInternet2 = driver.find_element_by_class_name("current").is_enabled() while (NoInternet): time.sleep(10) #NoInternet = driver.find_element_by_id("main-message").is_enabled() #if(NoInternet == False): # break #print("proxy not good.. close browser!")
"mT": [], "kT": [], "aT": [], "tT": [], "sign": self.url_encode(sign).decode() } t = "https://ihotel.meituan.com/productapi/v2/prepayList?type=1&utm_medium=PC&version_name=" \ "7.3.0&poiId=" + taken["poiId"] + "&start=" + \ taken["start"] + "&end=" + taken["end"] + "&uuid=" + taken["uuid"] + "&_token=" + \ self.url_encode(_tokon).decode() try: res = requests.get(t, headers=headers, timeout=2.0) #proxies=self.proxy data_list.append( int(re.findall(r'lowestPrice":[0-9]+', res.text)[0][13:]) / 100) # 最低价格(并没有神马卵用) except Exception as e: print(e) print(t) return None return data_list if __name__ == '__main__': proxy = next(get_proxy()) print(proxy) n = Crawl() n.proxy = proxy t = n.get_hotel_list('http://hotel.meituan.com/beijing/') t = n.crawl('http://hotel.meituan.com/1211661/') print(t)
- 多线程,队列操作,断点续传 author:https://github.com/HANKAIluo 2018.3.18 """ import threading from queue import Queue from crawl import Crawl from datafile import Datafile from getproxy import get_proxy import time Datafile = Datafile() proxies = get_proxy() thread_count = 40 class MasterThread: def __init__(self): self.count = { 'count': 0, #爬取总数 'failed_count': 0, #爬取失败总数 'sucess_count': 0, #成功爬取总数 'start_time': time.asctime(), #开始时间 'end_time': 0, #结束时间 } self.endtime = time.localtime().tm_min + 1 self.proxy = next(proxies)