def get_movie_url(url): data_dict = {} i = 0 html = ask_url(url) doc = etree.HTML(html) # 所有级数的a标签和文本 all_url = doc.xpath('//div[@class="fed-play-item fed-drop-item fed-visible"]//ul[@class="fed-part-rows"]/li/a/@href') all_title = doc.xpath('//div[@class="fed-play-item fed-drop-item fed-visible"]' '//ul[@class="fed-part-rows"]/li/a/text()') # 用selenium获取iframe里的src c_service = Service('/usr/bin/chromedriver') c_service.command_line_args() c_service.start() option = webdriver.ChromeOptions() option.add_argument('--headless') option.add_argument('--no-sandbox') option.add_argument('--disable-dev-shm-usage') browser = webdriver.Chrome('/usr/bin/chromedriver', options=option) # print('正在爬取视频链接中') for url in all_url: browser.get('https://kuyun.tv'+url) movie_url = browser.find_element_by_id('fed-play-iframe').get_attribute('src') data_dict[all_title[i]] = movie_url i = i+1 browser.quit() c_service.stop() return data_dict
def punch(StudentId, Name): c_service = Service('/Users/wq//Downloads/chromedriver') c_service.command_line_args() c_service.start() driver = webdriver.Chrome( '/Users/wq/Downloads/chromedriver') # 选择Chrome浏览器 driver.get('http://xsc.sicau.edu.cn/SPCP') # 打开网站 #采用xpath定位 result = driver.find_element_by_xpath('//*[@id="code-box"]') text = result.text driver.find_element_by_xpath('//*[@id="StudentId"]').click() driver.find_element_by_xpath('//*[@id="StudentId"]').send_keys(StudentId) driver.find_element_by_xpath('//*[@id="Name"]').click() driver.find_element_by_xpath('//*[@id="Name"]').send_keys(Name) driver.find_element_by_xpath('//*[@id="codeInput"]').click() driver.find_element_by_xpath('//*[@id="codeInput"]').send_keys(text) driver.find_element_by_xpath('//*[@id="Submit"]').click() driver.find_element_by_xpath('//*[@id="platfrom2"]').click() try: driver.find_element_by_xpath('//*[@id="ckCLS"]').click() driver.find_element_by_xpath('//*[@id="SaveBtnDiv"]/button').click() except: driver.find_element_by_xpath( '//*[@id="layui-layer1"]/div[3]/a').click() driver.quit() c_service.stop()
class ChromeDriverManager(DriverManager): __chservice = None def launch_browser(self): chrome_option = Options() chrome_option.add_argument("--disable-infobars") chrome_option.add_argument("--start-maximized") chrome_option.add_argument("--disable-popup-blocking") cur_dir_path = os.path.dirname(os.path.realpath(__file__)) chromedriver = cur_dir_path.split( sep='\\base')[0] + QEEnvironment.get_environment_dict().get( 'BrowserPath') os.environ["webdriver.chrome.driver"] = chromedriver driver = webdriver.Chrome(chromedriver, options=chrome_option) driver.get('https://ui.cogmento.com/') def start_service(self): try: if self.__chservice == None: cur_dir_path = os.path.dirname(os.path.realpath(__file__)) driver_path = cur_dir_path.split( sep='\\base')[0] + QEEnvironment.get_environment_dict( ).get('BrowserPath') self.__chservice = Service(driver_path) self.__chservice.start() print('Service is started') except: print(traceback.print_exc()) def stop_service(self): if self.__chservice != None and self.__chservice.is_connectable(): print('Stop service') self.__chservice.stop() def create_driver(self): chrome_option = webdriver.ChromeOptions() chrome_option.add_argument("--disable-infobars") chrome_option.add_argument("--start-maximized") chrome_option.add_argument("--disable-popup-blocking") capabilities = DesiredCapabilities.CHROME.copy() capabilities['browser'] = 'chrome' capabilities = chrome_option.to_capabilities() self.driver = webdriver.Remote(self.__chservice.service_url, desired_capabilities=capabilities) self.edriver = EventFiringWebDriver(self.driver, EventListener()) self.edriver.implicitly_wait( QEEnvironment.get_environment_dict().get('ImplicitWait')) self.edriver.get(QEEnvironment.get_environment_dict().get('URL'))
def getcook(): loginurl = 'http://113.57.169.227:8088/ccps/login.jsp' # 登录页面 path = r'd:\chromedriver.exe' # 加载webdriver驱动,用于获取登录页面标签属性 # driver = webdriver.Chrome(r'd:\chromedriver.exe') # option = webdriver.ChromeOptions() # option.binary_location = r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe' # option.add_argument('--headless') #增加无界面选项 # option.add_argument('--disable-gpu') #如果不加这个选项,有时定位会出现问题 # option.add_experimental_option('excludeSwitches', ['enable-logging']) c_service = Service(path) c_service.command_line_args() c_service.start() chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') driver = webdriver.Chrome(executable_path=path, options=chrome_options) driver.get(loginurl) # 请求登录页面 driver.find_element_by_id('wcode').clear() # 获取用户名输入框,并先清空 driver.find_element_by_id('wcode').send_keys(u'WHBK100') # 输入用户名 driver.find_element_by_id('password').clear() # 获取密码框,并清空 driver.find_element_by_id('password').send_keys(u'') # 输入密码 #captcha = driver.find_element_by_id('captcha_image') # 获取验证码标签 #submit = driver.find_element_by_css_selector('a[name="登录"]') # 获取提交按钮 submit = driver.find_element_by_link_text("登录") # 判断是否需要验证码 captcha = [] if captcha: captcha_field = driver.find_element_by_id('captcha_field') # 获取验证码输入框 text = input("请输入验证码:") # 控制栏输入验证码 captcha_field.send_keys(text) # 将输入的验证码传递给selenium打开的浏览器 submit.click() # 按钮提交并登录 else: submit.click() # 无验证码则直接登录提交 cookies = driver.get_cookies() # 获取COOK #driver.get('http://113.57.169.227:8088/ccps//workorder/findWorkOrderList.action?workOrder.range=yff&workOrder.standby3=order_deal') # 请求其他页面 time.sleep(1) driver.quit() c_service.stop() #print(cookies) return cookies # 返回cookies 之后其他方法可以调用,这样不用每次请求都返回登录
def get_urls(xingqi, A): #由于在后台打开浏览器,因此不能很好的关闭,所以用service.start(),service.close()控制进程开关 c_service = Service( 'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe') c_service.command_line_args() c_service.start() #使用谷歌自带的无头浏览器模式,悄无声息地运行 opt = Options() opt.add_argument('--headless') browser = webdriver.Chrome(chrome_options=opt) wait = WebDriverWait(browser, 10) #设置延迟10秒,等待网页加载 # browser.get('http://www.qqshidao.com/index.php?c=home&a=bifen') time.sleep(3) submit = wait.until( EC.element_to_be_clickable( (By.XPATH, '//*[@id="app"]/div[6]/div/span[6]'))) submit.click() time.sleep(2) js = 'var q=document.documentElement.scrollTop=100000' #设置往下拉网页的长度,设置大点,直接拉到底部 browser.execute_script(js) #发现当比赛较多时,往下拉一下后加载新的数据,因此,自动往下拉一下就停了 time.sleep(1) browser.execute_script(js) #再往下拉一下,加载全部比赛 time.sleep(2) yuanma = browser.page_source browser.quit() c_service.stop() s = etree.HTML(yuanma) urls = [] urls_ = s.xpath('//*[@id="app"]/div[7]/div/table/tbody/tr[@data-fid]') for each in urls_: fid = each.attrib['data-fid'] xingqiji = each.xpath('./td[3]/text()')[0] if xingqi in xingqiji: url = 'http://www.qqshidao.com/index.php?c=odds&a=betfair&fid={}'.format( fid) A['{}'.format(url)] = xingqiji urls.append(url) return urls
def download_by_webdriver(url, charset='utf-8', proxy=None, user_agent=None): # 传入URL,使用浏览器下载后,返回页面。 print("[download_by_webdriver]: begin download the link %s" % url) try: # 进入浏览器设置 options = webdriver.ChromeOptions() # 谷歌无头模式 options.add_argument('--headless') options.add_argument('--disable-gpu') # options.add_argument('window-size=1200x600') # 设置中文 options.add_argument('lang=zh_CN.UTF-8') # 设置代理 if proxy: print("[download_by_webdriver]: use proxy %s" % proxy) options.add_argument('proxy-server=' + proxy) # 添加头 if user_agent: options.add_argument('user-agent=' + user_agent) else: options.add_argument( 'user-agent=' + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/71.0.3578.98 Safari/537.36') # 设置驱动服务 c_service = Service('/usr/local/bin/chromedriver') c_service.command_line_args() c_service.start() driver = webdriver.Chrome(chrome_options=options) driver.get(url) driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") driver.implicitly_wait(10) driver.set_page_load_timeout(15) p_content = driver.page_source.encode(charset, "ignore").decode(charset, 'ignore') current_url = driver.current_url driver.quit() c_service.stop() except Exception as e: print("[download_by_webdriver]:", e) p_content, current_url = None, None return p_content, current_url
def download_by_webdriver(url, charset='utf-8'): # 传入URL,使用浏览器下载后,返回页面。 print("[download_by_webdriver]: begin download the link %s" % url) try: chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') c_service = Service('/usr/local/bin/chromedriver') c_service.command_line_args() c_service.start() driver = webdriver.Chrome(chrome_options=chrome_options) driver.get(url) driver.implicitly_wait(10) content = driver.page_source.encode(charset, "ignore").decode( charset, 'ignore') current_url = driver.current_url driver.quit() c_service.stop() except Exception as e: print("[download_by_webdriver]:", e) content, current_url = None, None return content, current_url
class SeleniumExampleTest(unittest.TestCase): def setUp(self): print("Setting up") self.__app_url = "http://192.168.56.101/wp-admin" self.__app_logout_url = "http://192.168.56.101/wp-login.php?action=logout" self.username = "******" self.password = "******" driver_path = ChromeDriverManager().install() self.svc = Service(driver_path) self.svc.start() self.driver = webdriver.Remote(self.svc.service_url) self.waiter = WebDriverWait(self.driver, 30) def tearDown(self): print("Cleaning up") self.driver.quit() self.svc.stop() def test_wp_login(self): self.driver.get(self.__app_url) self.waiter.until(EC.presence_of_element_located((By.ID, "wp-submit"))) element = self.waiter.until( EC.element_to_be_clickable((By.NAME, "log"))) element.send_keys(self.username) element = self.waiter.until( EC.element_to_be_clickable((By.NAME, "pwd"))) element.send_keys(self.password) element = self.waiter.until( EC.element_to_be_clickable( (By.CSS_SELECTOR, "input[type *= 'sub']"))) element.click() self.waiter.until( EC.presence_of_element_located( (By.XPATH, "//div[contains(*//text(), 'Welcome')]"))) self.driver.get()
from selenium import webdriver from selenium.webdriver.chrome.service import Service base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(base_dir + '/bin') driver_path = base_dir + '/bin/chromedriver' # 初始化 chrome service chrome_service = Service(executable_path=driver_path) chrome_service.command_line_args() chrome_service.start() # 初始化 driver driver = webdriver.Chrome(driver_path) driver.implicitly_wait(30) driver.maximize_window() driver.get("http://www.linkedsee.com/") products = driver.find_elements_by_xpath("//ul[@class='drop box-1']/li/a") print(products) print("found " + str(len(products)) + "products") for product in products: print(product.get_attribute('textContent')) driver.quit() chrome_service.stop()
class Spider: def __init__(self): self.c_service = Service( 'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe' ) self.c_service.command_line_args() self.c_service.start() chrome_options = Options() chrome_options.add_argument('--headless') #不显示界面 #chrome_options.add_argument('--disable-gpu') self.driver = webdriver.Chrome(chrome_options=chrome_options) self.url = "https://music.163.com/#/discover/toplist?id=3778678" '''找到所有热门歌曲的URL和歌曲名称''' def find_allSong(self): # self.driver.implicitly_wait(10) #print("start") WebDriverWait(self.driver, 3, 0.5).until( lambda driver: self.driver.find_element_by_id("g_iframe")) #print("All download") self.driver.switch_to.frame(self.driver.find_element_by_id("g_iframe")) # with open("test3.html",'w',encoding='UTF-8') as file_obj: # file_obj.write(self.driver.page_source) # header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'} # response = driver.get(self.url,headers = header) #请求网址 # print(response.request.headers) # print(response.headers) # print(response.status_code) # print(response.content.decode("utf-8")) soup = BeautifulSoup(self.driver.page_source, "lxml") # song_list = soup.find('tbody') # 找到所有歌曲 all_song = song_list.find_all('tr') for each_song in all_song: each_info = each_song.find('span', class_="txt") # each_info_2 = each_song.find('span', class_="title") #each_info = each_song.find_all('span') long_time = each_song.find('span', class_="u-dur").text author = each_song.find('span', class_="icn icn-share")['data-res-author'] link = "https://music.163.com/" + each_info.find('a')['href'] name = each_info.find('b')['title'] print('歌曲连接 : {}, 歌曲名 : {},作者 : {},时长 : {}'.format( link, name, author, long_time)) self.comment(link, name) #爬取评论 def comment(self, link, name): self.driver.execute_script("window.open('%s')" % link) # print(self.driver.window_handles) # print(self.driver.current_window_handle) self.driver.switch_to.window(window_name=self.driver.window_handles[1]) # print(self.driver.current_window_handle) WebDriverWait(self.driver, 3, 0.5).until( lambda driver: self.driver.find_element_by_id("g_iframe")) self.driver.switch_to.frame(self.driver.find_element_by_id("g_iframe")) page = 0 with open(name + "-评论信息.csv", 'w', encoding="utf-8") as fp: fp.write('评论者,评论内容,评论日期' + '\n') while True: page += 1 soup = BeautifulSoup(self.driver.page_source, "lxml") all_comment = soup.find_all('div', class_='itm') #print(soup) for each_comment in all_comment: comment_name = each_comment.find('a', class_='s-fc7').text tmp = each_comment.find('div', class_='cnt f-brk').text date = each_comment.find('div', class_='time s-fc4').text comment = tmp.replace(comment_name + ':', '') #print(comment_name, comment) with open(name + "-评论信息.csv", 'a+', encoding="utf-8") as fp: fp.write(comment_name + ',' + comment + ',' + date + '\n') ''' 下一页 ''' try: print("{}--第{}页.".format(name, page)) next = self.driver.find_element_by_xpath( '//*[starts-with(@class,"zbtn znxt") and not(contains(@class,"js-disabled"))]' ) #xpath路径匹配节点 except: print(self.driver.current_window_handle) self.driver.switch_to.window( window_name=self.driver.window_handles[0]) print(self.driver.current_window_handle) self.driver.close() # 关闭当前窗口 return #last = self.driver.find_element_by_xpath('//*[starts-with(@class,"zbtn znxt js-n") and contains(@class,"js-disabled")]') else: next.send_keys(Keys.ENTER) time.sleep(0.5) def run(self): self.driver.get(self.url) #启动浏览器 self.find_allSong() #self.comment() #self.driver.close() # 关闭当前窗口 self.driver.quit() # 关闭进程 def __del__(self): self.c_service.stop()
class WebDriver(RemoteWebDriver): """ Controls the ChromeDriver and allows you to drive the browser. You will need to download the ChromeDriver executable from http://chromedriver.storage.googleapis.com/index.html """ def __init__(self, executable_path="chromedriver", port=0, chrome_options=None, service_args=None, desired_capabilities=None, service_log_path=None): """ Creates a new instance of the chrome driver. Starts the service and then creates new instance of chrome driver. :Args: - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH - port - port you would like the service to run, if left as 0, a free port will be found. - desired_capabilities: Dictionary object with non-browser specific capabilities only, such as "proxy" or "loggingPref". - chrome_options: this takes an instance of ChromeOptions """ if chrome_options is None: # desired_capabilities stays as passed in if desired_capabilities is None: desired_capabilities = self.create_options().to_capabilities() else: if desired_capabilities is None: desired_capabilities = chrome_options.to_capabilities() else: desired_capabilities.update(chrome_options.to_capabilities()) self.service = Service(executable_path, port=port, service_args=service_args, log_path=service_log_path) self.service.start() try: RemoteWebDriver.__init__( self, command_executor=ChromeRemoteConnection( remote_server_addr=self.service.service_url), desired_capabilities=desired_capabilities) except: self.quit() raise self._is_remote = False def launch_app(self, id): """Launches Chrome app specified by id.""" return self.execute("launchApp", {'id': id}) def quit(self): """ Closes the browser and shuts down the ChromeDriver executable that is started when starting the ChromeDriver """ try: RemoteWebDriver.quit(self) except: # We don't care about the message because something probably has gone wrong pass finally: self.service.stop() def create_options(self): return Options()
def AutoInfoSpider(url): # 启动服务 c_service = Service( 'C:/Users/gusisong/AppData/Local/Continuum/anaconda3/chromedriver.exe') c_service.command_line_args() c_service.start() # 功能配置 option = webdriver.ChromeOptions() option.add_argument('--headless') option.add_argument('--disable-gpu') option.add_argument('--disable-images') option.add_argument('--disable-javascript') option.add_argument('--disable-plugins') option.add_argument('--no-sandbox') driver = webdriver.Chrome(options=option) driver.get(url) output = [] for letter_index in range(1, 25): # 按品牌抓取 brand_list = driver.find_elements_by_xpath( '/html/body/div[8]/div[1]/div[2]/div/div[{}]//a/div'.format( letter_index)) for brand_index in range(2, len(brand_list) + 2): # 记录品牌名称 brand_name = driver.find_element_by_xpath( '/html/body/div[8]/div[1]/div[2]/div/div[{0}]/div[{1}]/a/div'. format(letter_index, brand_index)).text # 进入车型目录 driver.find_element_by_xpath( '/html/body/div[8]/div[1]/div[2]/div/div[{0}]/div[{1}]/a/div'. format(letter_index, brand_index)).click() # 按页码抓取 page_list = driver.find_elements_by_xpath( '/html/body/div[8]/div[4]/div[5]/div/div/div/a') for page_index in range(1, len(page_list) + 1): if page_index > 1: driver.find_element_by_xpath( '/html/body/div[8]/div[4]/div[5]/div/div/div/a[{}]'. format(page_index)).click() model_list = driver.find_elements_by_xpath( '/html/body/div[8]/div[4]/div[3]/div') for model_index in range(1, len(model_list) + 1): # 记录车型名称 model_name = driver.find_element_by_xpath( '/html/body/div[8]/div[4]/div[3]/div[{}]/a/p[1]'. format(model_index)).text # 记录车型价格 price_range = driver.find_element_by_xpath( '/html/body/div[8]/div[4]/div[3]/div[{}]/a/p[2]'. format(model_index)).text if '-' in price_range: min_price = price_range[:-1].split('-')[0] + '万' max_price = price_range[:-1].split('-')[1] + '万' elif '暂无' in price_range: min_price = '暂无' max_price = '暂无' else: min_price = price_range max_price = price_range # 记录图片链接 pic_link = driver.find_element_by_xpath( '/html/body/div[8]/div[4]/div[3]/div[{}]/a/img'.format( model_index)).get_attribute("src") print(brand_name, model_name, min_price, max_price, pic_link) output.append([ brand_name, model_name, min_price, max_price, pic_link ]) # 写入文件 df = DataFrame(output) df.columns = ['品牌名', '车型', '最低价', '最高价', '图片链接'] df.to_csv('auto_info.csv', encoding='utf_8_sig') # 后台完全关闭 driver.close() driver.quit() c_service.stop()
class MyWebBoost(): def __init__(self, chromedriver_path, target_url): chromedriver = chromedriver_path chrome_options = Options() chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') self.s = Service(chromedriver) self.driver = webdriver.Chrome(service=self.s, options=chrome_options) # 设置超时,双重保险 self.driver.set_page_load_timeout(30) self.driver.set_script_timeout(30) self.target_url = target_url # 0.开始获取连接 def startConnect(self): global maxConnectTime # 最多尝试3次,不行就直接退出 if maxConnectTime == 3: self.quit() return isSuccess = self.getUrl() # 地址获取成功 if isSuccess: self.startMyWork() # 获取失败,递归加载,最多尝试3次 else: maxConnectTime = maxConnectTime + 1 time.sleep(1) self.startConnect() # 1.尝试登录url def getUrl(self): try: self.driver.get(self.target_url) return True except Exception as e: logging.error( "------------ts.js.vnet.cn 首次加载失败!------------") return False # 2、执行方法 def startMyWork(self): global maxLoadTime # 最多尝试3次,不行就直接退出 if maxLoadTime == 6: self.quit() return # 开始操作 flag, d_time = self.startClick() if flag: logging.error("操作成功!") self.quit() # senWChat("提速成功!") return else: # 失败,时间对不上,则sleep maxLoadTime = maxLoadTime + 1 logging.error("需要休眠:%s秒" % str(d_time)) time.sleep(d_time + 5) # 刷新 if self.getUrl(): self.startMyWork() else: self.quit() return # 3、点击事件 def startClick(self): global maxRefreshTime try: self.driver.get_screenshot_as_file('/root/startClick.png') # 1、点击同意书 readBtn = self.driver.find_elements(By.XPATH, '//*[@id="CheckAgree"]')[0] if readBtn.is_displayed(): # logging.error( # "-----------已查询到同意书-------------") readBtn.click() time.sleep(1) self.driver.find_elements(By.XPATH, '//*[@id="ButtonAgree"]')[0].click() time.sleep(3) # self.driver.get_screenshot_as_file('/root/clicked_submit.png') self.driver.find_elements(By.XPATH, '//div[@id="CmdBtn"]/div/input[2]')[0].click() time.sleep(5) # 2、点击试用按钮 else: # logging.error( # "-----------不需要查询同意书,直接试用-------------") self.driver.find_elements(By.XPATH, '//div[@id="CmdBtn"]/div[1]/input[2]')[0].click() time.sleep(5) except Exception as e: # logging.error(e) # self.driver.get_screenshot_as_file('/volume2/web/tryfailed.png') maxRefreshTime = maxRefreshTime + 1 if maxRefreshTime == 8: self.quit() else: if self.getUrl(): self.startMyWork() else: self.quit() try: self.driver.get_screenshot_as_file('/root/clicked_try.png') # 查看是否已经到期 # WebDriverWait(self.driver, 10).until( # EC.presence_of_element_located((By.ID, "btnExperiencesOK"))) # 已经到期——点击确定试用 time.sleep(5) self.driver.find_element(By.XPATH, '//input[@id="btnExperiencesOK"]').click() # text = driver.find_element_by_xpath( # '//p[@id="OpenResMessage"]').text # 截图 # self.driver.get_screenshot_as_file('/volume2/web/ss.png') return True, 0 except Exception as e: self.driver.get_screenshot_as_file('/root/clicked_fail.png') # 没有到期,找到到期的日期 text = self.driver.find_element(By.XPATH, '//p[@id="OpenResMessage"]').text # text = "亲,您今天的下行体验时间将于00点08分到期" logging.error(text) if len(text) > 0: # TODO delayTime = self.getDelayTime(text) # logging.info("最后需要等待的时间(s):" + str(delayTime)) return False, delayTime else: maxRefreshTime = maxRefreshTime + 1 if maxRefreshTime == 10: self.quit() else: if self.getUrl(): self.startMyWork() else: self.quit() # 算出延迟的时间 @ staticmethod def getDelayTime(text): logging.info("提醒时间:" + text) endTime = text.split('将于')[1].split("分到期")[0].replace('点', ':') nowDay = time.strftime("%Y-%m-%d ", time.localtime()) # 拼接,具体的到期日期 finalTime = nowDay + endTime + ":59" # 现在的具体日期 nowTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) d1 = datetime.datetime.strptime(nowTime, '%Y-%m-%d %H:%M:%S') d2 = datetime.datetime.strptime(finalTime, '%Y-%m-%d %H:%M:%S') # 服务器时间比现实时间慢200秒左右 d2 = d2 + datetime.timedelta(seconds=240) # 比较日期,如果 到期日期 < 当前日期 if d1 > d2: differSecond = (d1 - d2).seconds else: differSecond = (d2 - d1).seconds logging.info("当前的时间:" + nowTime) logging.info("到期的时间:" + finalTime) # return 150 if differSecond < 130 else differSecond return differSecond # 退出Chromedriver def quit(self): # self.driver.close() self.driver.quit() self.s.stop() os.system('ps -ef|grep chromedriver|grep -v grep|awk \'{print $2}\'|xargs kill -9') os.system('ps -ef|grep chrome|grep -v grep|awk \'{print $2}\'|xargs kill -9')
class CaesarReaderWindow(QMainWindow, Ui_myMainWindow): def __init__(self, parent=None): super(CaesarReaderWindow, self).__init__(parent) self.chromeDriverPath = os.path.abspath('chromedriver.exe') self.driver = None self.c_service = None self.startReadFlag = False self.stopReadFlag = False self.stopPPPOEFlag = False # 已完成文章篇数 self.readNum = 0 # 剩余文章篇数 self.unReadNum = 0 self.driverThread = DriverThread(self) self.pppoeThread = PPPOETask(self) self.setupUi(self) self.connectPppoeBtn.clicked.connect(self.connect_pppoe_click) self.disconnectPppoeBtn.clicked.connect(self.disconnect_pppoe_click) self.saveConfigBtn.clicked.connect(self.save_config_click) self.startReadBtn.clicked.connect(self.start_read_click) self.stopReadBtn.clicked.connect(self.end_read_click) self.init_config() def init_config(self): """ 初始化全局配置 :return: """ try: initConfigTask = InitConfigTask(self) pool = QThreadPool.globalInstance() pool.start(initConfigTask) except Exception as e: self.print_log(str(e)) pass def popen(self, cmd): try: popen = subprocess.Popen(cmd, stdout=subprocess.PIPE) popen.wait() lines = popen.stdout.readlines() return [line.decode('gbk') for line in lines] except Exception as e: self.print_log("获取ip异常") return -1 def save_config_click(self): """ 保存全局配置信息 :return: """ startTime = self.startTimeEdit.text() endTime = self.endTimeEdit.text() pauseTimeFrom = self.pauseTimeFromEdit.text() pauseTimeTo = self.pauseTimeToEdit.text() slipTimesFrom = self.slipTimesFromEdit.text() slipTimesTo = self.slipTimesToEdit.text() pxFrom = self.pxFromEdit.text() pxTo = self.pxToEdit.text() chromeLocation = self.chromeLocationEdit.text() if not (startTime and endTime and pauseTimeFrom and pauseTimeTo and slipTimesFrom and slipTimesTo and pxFrom and pxTo and chromeLocation): return configObj = { 'startTime': startTime, 'endTime': endTime, 'pauseTimeFrom': pauseTimeFrom, 'pauseTimeTo': pauseTimeTo, 'slipTimesFrom': slipTimesFrom, 'slipTimesTo': slipTimesTo, 'pxFrom': pxFrom, 'pxTo': pxTo, 'chromeLocation': chromeLocation } with open(os.path.abspath('config.ini'), 'w', encoding='utf-8') as f: f.write(json.dumps(configObj)) f.flush() f.close() pass def start_read_click(self): """ 开始阅读按钮点击 :return: """ self.startReadFlag = True self.stopReadFlag = False try: if not self.driverThread.isRunning(): self.driverThread.trigger.connect(self.reconnect_pppoe) self.driverThread.start() except Exception as e: self.print_log(str(e)) pass def end_read_click(self): """ 停止阅读按钮点击 :return: """ self.startReadFlag = False self.stopReadFlag = True try: if self.driver is not None: self.driver.quit() self.driver = None except Exception as msg: pass try: if self.c_service is not None: self.c_service.stop() self.c_service = None except Exception as msg: pass pass def connect_pppoe_click(self): """ 开始拨号按钮点击 :return: """ self.stopPPPOEFlag = False try: if not self.pppoeThread.isRunning(): self.pppoeThread.trigger.connect(self.read_next) self.pppoeThread.start() except Exception as e: self.print_log(str(e)) pass def disconnect_pppoe_click(self): """ 停止拨号按钮点击 :return: """ self.stopPPPOEFlag = True data = self.check_for_broadband() if data is not None: for p in data: self.show_ip_address() if self.disconnect_pppoe(p[0]) == "success": self.print_log("宽带%s已经断开" % p[1]) sleep(5) pass def read_next(self): """ 阅读下一篇 :return: """ if not self.startReadFlag: return if self.stopReadFlag or self.stopPPPOEFlag: return self.start_read_click() pass def reconnect_pppoe(self): """ 重新连接pppoe :return: """ if self.stopReadFlag or self.stopPPPOEFlag: return self.connect_pppoe_click() pass def build_driver(self, url): """ 构建阅读driver :return: """ if not url: self.stopReadFlag = True self.print_log("已全部阅读完成\n") return self.readNum = self.readNum + 1 self.print_log("开始阅读第 %d 篇,剩余 %d 篇" % (self.readNum, self.unReadNum)) self.print_log("当前:%s" % url) self.c_service = Service(self.chromeDriverPath) self.c_service.command_line_args() self.c_service.start() mobileEmulation = { "deviceMetrics": { "width": 320, "height": 640, "pixelRatio": 3.0 }, "userAgent": 'Mozilla/5.0 (Linux; Android 4.1.1; GT-N7100 Build/JRO03C) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/6.3' } # mobileEmulation = {'deviceName': 'Apple iPhone 5'} chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--window-size=250,640') chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('--hide-scrollbars') chrome_options.add_argument('--disable-javascript') chrome_options.add_argument('--log-level=3') chrome_options.binary_location = self.chromeLocationEdit.text() chrome_options.add_experimental_option('mobileEmulation', mobileEmulation) chrome_options.add_experimental_option("excludeSwitches", ['enable-automation']) chrome_options.add_experimental_option('w3c', False) self.driver = webdriver.Chrome(options=chrome_options) # 操作这个对象. self.driver.get(url) num = random.randint(int(self.slipTimesFromEdit.text()), int(self.slipTimesToEdit.text())) # 下滑次数 hasNum = 0 for n in range(num): if not self.stopReadFlag: holdTime = random.randint(int(self.pauseTimeFromEdit.text()), int(self.pauseTimeToEdit.text())) px = random.randint(int(self.pxFromEdit.text()), int(self.pxToEdit.text())) self.print_log("第 %d 次下滑,等待 %d 秒, 下滑 %d 像素" % (n + 1, holdTime, px)) # 每次下滑停顿时间 sleep(holdTime) action = TouchActions(self.driver) action.scroll(0, 200).perform() hasNum = hasNum + 1 else: break try: if self.driver is not None: self.driver.quit() self.driver = None except Exception as msg: pass try: if self.c_service is not None: self.c_service.stop() self.c_service = None except Exception as msg: pass if self.stopReadFlag: self.print_log("第 %d 篇阅未完成,共下滑 %d 次\n" % (self.readNum, hasNum)) return else: self.print_log("第 %d 篇阅读完成,共下滑 %d 次\n" % (self.readNum, hasNum)) try: # 删除第一行 with open(os.path.abspath('unread.txt'), 'r', encoding='utf-8') as f: content = f.readlines() with open(os.path.abspath('unread.txt'), 'w+', encoding='utf-8') as f1: f1.writelines(content[1:]) f1.flush() f1.close() f.close() # 追加到最后一行 with open(os.path.abspath('read.txt'), 'a', encoding='utf-8') as f: f.write(url) f.close() except Exception as e: print(e) pass def build_pppoe(self): """ 构建pppoe :return: """ data = self.check_for_broadband() if data is not None: for p in data: self.show_ip_address() if self.disconnect_pppoe(p[0]) == "success": self.print_log("宽带%s已经断开" % p[1]) sleep(5) else: try: pid, res = self.dial_broadband() if res == 0: self.show_ip_address() sleep(5) except Exception as ee: pass pass def print_log(self, message): """ 异度打印日志 :param message: 日志信息 :return: """ try: logTask = LogTask(self, message) pool = QThreadPool.globalInstance() pool.start(logTask) except Exception as msg: print(msg) pass def setUnReadNum(self, num): self.unReadNum = num def connect_pppoe(self, dialname, account, passwd): dial_params = (dialname, '', '', account, passwd, '') return win32ras.Dial(None, None, dial_params, None) def dial_broadband(self): """ 宽带拨号 :return: """ self.pppoeStatusLbl.setText("正在拨号...") dialname = '宽带连接' # just a name account = self.accountEdit.text() passwd = self.passwordEdit.text() self.print_log("正在拨号") try: # handle is a pid, for disconnect or showipadrress, if connect success return 0. # account is the username that your ISP supposed, passwd is the password. handle, result = self.connect_pppoe(dialname, account, passwd) if result == 0: self.print_log("拨号成功") self.pppoeStatusLbl.setText("拨号成功") return handle, result else: if self.stopPPPOEFlag: self.print_log("拨号失败") self.pppoeStatusLbl.setText("拨号失败") return -1, -1 else: self.print_log("拨号失败,3秒后重试") self.pppoeStatusLbl.setText("正在重试") sleep(3) return self.dial_broadband() except Exception as e: self.print_log("拨号异常" + str(e)) return -1, -1 def disconnect_pppoe(self, handle): self.print_log("正在断开宽带!") self.pppoeStatusLbl.setText("正在断开") if handle is not None: try: win32ras.HangUp(handle) self.print_log("宽带断开成功!") self.pppoeStatusLbl.setText("断开成功") return "success" except Exception as e: self.print_log("宽带断开失败,3秒后重试") self.pppoeStatusLbl.setText("断开失败") sleep(3) return self.disconnect_pppoe(handle) else: self.print_log("宽带断开异常") self.pppoeStatusLbl.setText("断开失败") return "fail" def check_for_broadband(self): connections = win32ras.EnumConnections() if len(connections) == 0: self.print_log("系统未运行任何宽带连接") return else: self.print_log("系统正在运行%d个宽带连接" % len(connections)) return connections def show_ip_address(self): self.print_log("正在查询IP") self.pppoeIpLbl.setText("") ipconfig_result_list = self.popen('ipconfig') if ipconfig_result_list == -1: return ip_str = None have_ppp = 0 for line in ipconfig_result_list: if line.find("宽带连接") >= 0: have_ppp = 1 if have_ppp == 1: if line.strip().startswith("IPv4 地址"): ip_str = line.split(":")[1].strip() have_ppp = 0 if ip_str is not None: self.print_log("IP地址为: " + ip_str) self.pppoeIpLbl.setText(ip_str) pass
class ICloud(object): TIMEOUT = 100 id = None account = None browser = None chrome = None wait = None tab = None deleted = False mapping = set() c_service = None def __init__(self): gc.collect() self.id = random.randint(0, 100) self.c_service = Service('chromedriver') self.c_service.command_line_args() self.c_service.start() self.start_browser() logger.info('Start a browser.') def __wait_for_visible(self, xpath): return self.wait.until( expected_conditions.visibility_of_element_located( (By.XPATH, xpath))) def run_login(self, account, password): logger.info('"{account}" is logging in.'.format(account=account)) self.account = account # Get the login page. self.browser.get('https://www.icloud.com/#fmf') auth_frame = self.__wait_for_visible('//*[@id="auth-frame"]') self.browser.switch_to.frame(auth_frame) logger.info('Login page is loaded.') # Process input: account name and password. remember_me_input = self.browser.find_element_by_xpath( '//*[@id="remember-me"]') remember_me_input.click() account_name_text_field = self.browser.find_element_by_xpath( '//*[@id="account_name_text_field"]') account_name_text_field.send_keys(account) account_name_text_field.send_keys(Keys.RETURN) password_text_field = self.__wait_for_visible( '//*[@id="password_text_field"]') password_text_field.send_keys(password) password_text_field.send_keys(Keys.RETURN) try: # Wait until the code controls are visible. self.__wait_for_visible('//*[@id="char0"]') return True except TimeoutException: # Login failed. return False def run_codes(self, codes): # Write codes to each controls. for i in range(6): char = self.browser.find_element_by_xpath( '//*[@id="char{i}"]'.format(i=i)) char.send_keys(codes[i]) try: # Click trust button trust_browser = self.__wait_for_visible( '//*[starts-with(@id, "trust-browser-")]') trust_browser.click() except TimeoutException: # Codes were incorrect. return False self.save_cookies() logger.info('Start network listening...') self.tab = self.chrome.list_tab()[-1] self.tab.Network.responseReceived = self.response_received self.tab.start() self.tab.Network.enable() # Start auto refresh. Timer(60, self.auto_refresh).start() return True def save_cookies(self): cookies = self.browser.get_cookies() jsonCookies = json.dumps(cookies) with open('./logs/{id}.cookies'.format(id=self.id), 'w') as f: f.write(jsonCookies) def load_cookies(self): self.browser.delete_all_cookies() with open('./logs/{id}.cookies'.format(id=self.id), 'r') as f: listCookies = json.loads(f.read()) for cookie in listCookies: self.browser.add_cookie({ 'domain': cookie['domain'], 'name': cookie['name'], 'value': cookie['value'], 'path': '/', 'expires': None }) logger.info('Browser cookies loaded.') def start_browser(self): # Start chromedriver options = webdriver.ChromeOptions() options.add_argument('--disable-background-networking=false') options.add_argument('--no-sandbox') retry_count = 0 while True: try: self.browser = webdriver.Chrome( chrome_options=options, service_args=[ '--verbose', '--log-path=./logs/{id}.log'.format(id=self.id) ]) break except ConnectionResetError as e: retry_count += 1 if retry_count >= 10: raise e self.browser.set_page_load_timeout(self.TIMEOUT) self.wait = WebDriverWait(self.browser, self.TIMEOUT) # Get debug url url = None with open('./logs/{id}.log'.format(id=self.id), 'r') as log: for line in log: if 'DevTools request: http://localhost' in line: url = line[line.index('http'):].replace( '/json/version', '').strip() break if not url: raise Exception('Invalid protocol url.') # Start pychrome self.chrome = pychrome.Browser(url=url) def restart_browser(self): try: if self.browser: self.browser.close() self.browser.quit() except Exception: pass self.start_browser() logger.info('Browser restarted.') retry = 0 while True: try: self.browser.get('https://www.icloud.com/#fmf') self.load_cookies() self.browser.refresh() break except Exception as e: retry += 1 if retry >= 5: raise e logger.info('Start network listening...') self.tab = self.chrome.list_tab()[-1] self.tab.Network.responseReceived = self.response_received self.tab.start() self.tab.Network.enable() # Start auto refresh. Timer(60, self.auto_refresh).start() def auto_refresh(self): if self.deleted: return try: self.browser.switch_to.default_content() frame = self.__wait_for_visible('//*[@id="fmf"]') self.browser.switch_to.frame(frame) nearby = self.__wait_for_visible( '/html/body/div[2]/div/div/div[2]/div[1]/div/div[3]/div[1]/div[1]' ) friends = self.browser.find_elements_by_xpath( '/html/body/div[2]/div/div/div[2]/div[1]/div/div[3]/div[1]/div[not(contains(@class, "nearby"))]' ) for friend in friends: friend.click() nearby.click() self.save_cookies() Timer(60, self.auto_refresh).start() except WebDriverException as e: logger.error(e.args) self.refresh_page() def refresh_page(self, retry=1): if self.deleted: return if retry >= 5: logger.error('SERVICE DOWN! restarting...') try: self.restart_browser() except Exception as e: ICLOUD_DICT.pop(self.account) app.mail.send( 'FMF: SERVICE DOWN', '<p>{account} unavailable, login again.</p><p>{e}</p>'. format(account=self.account, e=e.args), img='logs/{id}.png'.format(id=self.id)) return logger.info('REFRESHING...') try: self.browser.save_screenshot('logs/{id}.png'.format(id=self.id)) self.browser.refresh() Timer(60, self.auto_refresh).start() except Exception as e: logger.error(e.args) retry += 1 Timer(10, self.refresh_page, [retry]).start() def response_received(self, **kwargs): response = kwargs.get('response') request_id = kwargs.get('requestId') if 'refreshClient' in response.get('url'): try: content = self.tab.Network.getResponseBody( requestId=request_id)['body'] except pychrome.CallMethodException: return logger.info('{request_id}: {content}'.format(request_id=request_id, content=content)) obj = json.loads(content) if 'locations' in obj: contacts = {} for contact in obj['contactDetails']: id = contact['id'] name = '{first} {middle} {last}'.format( first=contact['firstName'], middle=contact['middleName'], last=contact['lastName']).strip() contacts[id] = name for loc in obj['locations']: if loc['location'] is None: continue id = loc['id'] locid = loc['location']['locationId'] if loc['location']['address'] is None: address = 'UNKNOWN' elif 'formattedAddressLines' in loc['location']['address']: address = ' '.join(loc['location']['address'] ['formattedAddressLines']) else: address = '{streetAddress} {locality} {administrativeArea}'.format( streetAddress=loc['location']['address'] ['streetAddress'], locality=loc['location']['address']['locality'], administrativeArea=loc['location']['address'] ['administrativeArea']) time = loc['location']['timestamp'] / 1000.0 accuracy = loc['location']['horizontalAccuracy'] latitude = loc['location']['latitude'] longitude = loc['location']['longitude'] self.save_model({ 'locid': locid, 'account': self.account, 'uid': id, 'name': contacts[id], 'time': time, 'accuracy': accuracy, 'latitude': latitude, 'longitude': longitude, 'address': address }) def save_model(self, obj): if obj['uid'] not in self.mapping: res = requests.get('http://yingyan.baidu.com/api/v3/entity/list', params={ 'ak': BMAP_AK, 'service_id': YINGYAN_ID, 'filter': 'entity_names:{uid}'.format(uid=obj['uid']) }) jo = json.loads(res.text) if jo['status'] != 0: res = requests.post( 'http://yingyan.baidu.com/api/v3/entity/add', data={ 'ak': BMAP_AK, 'service_id': YINGYAN_ID, 'entity_name': obj['uid'], 'entity_desc': obj['name'] }) logger.info('YingYan ADD entity: {res}'.format(res=res.text)) self.mapping.add(obj['uid']) if Location.objects.filter(locid=obj['locid']): return try: Location.objects.create(locid=obj['locid'], account=obj['account'], uid=obj['uid'], name=obj['name'], time=datetime.datetime.fromtimestamp( obj['time']), accuracy=obj['accuracy'], latitude=obj['latitude'], longitude=obj['longitude'], address=obj['address']) except IntegrityError: pass res = requests.post('http://yingyan.baidu.com/api/v3/track/addpoint', data={ 'ak': BMAP_AK, 'service_id': YINGYAN_ID, 'entity_name': obj['uid'], 'latitude': obj['latitude'], 'longitude': obj['longitude'], 'loc_time': int(obj['time']), 'radius': obj['accuracy'], 'coord_type_input': 'wgs84', 'address': obj['address'] }) logger.info('YingYan ADD point: {res}'.format(res=res.text)) def __del__(self): self.deleted = True if self.browser: self.browser.quit() self.c_service.stop() logger.info('A browser is Closed.')
user32.TranslateMessage(ctypes.byref(msg)) user32.DispatchMessageA(ctypes.byref(msg)) finally: del msg user32.UnregisterHotKey(None, 98) user32.UnregisterHotKey(None, 99) if __name__ == "__main__": try: c_service = Service("chromedriver.exe") c_service.command_line_args() c_service.start() except WebDriverException as e: logging.error("Failed to start broswer service") try: init_log_config() thread_hotKey = HotKey("thread_hotKey") thread_hotKey.setDaemon(True) thread_hotKey.start() print("The thread listening event from keyboard is running...") driver = Driver() driver.run() except KeyboardInterrupt as e: logging.warning("Handle KeyboardInterrupt.") c_service.stop() os.sys.exit(1)
class ChromeDriverAdapter: def __init__(self, console: Console) -> None: self.console = console self.service = Service((os.path.dirname(__file__) or '.') + ('/bin/%s/chromedriver' % system()) ) #self.service = Service() self.drivers = [] self.tempfolder = [] self.service.start() def purge(self) -> None: for driver in self.drivers: try: driver.quit() except: pass for folder in self.tempfolder: shutil.rmtree(path=folder, ignore_errors=True) self.service.stop() def getNewInstance(self, uid, user, proxy=None, headless=False) -> WebDriver: pluginfile = None try: if proxy is None: if 'proxy' in user: proxy = user['proxy'] options = Options() options.add_argument('--no-sandbox') options.add_argument("--disable-dev-shm-usage") #options.add_argument("--log-level=3") #options.add_argument("--single-process") options.add_argument('media.eme.enabled') options.add_argument("--disable-gpu") options.add_argument('--disable-popup-blocking') #options.add_argument("--window-position=-32000,-32000"); options.add_argument("--disable-blink-features") options.add_argument("--disable-blink-features=AutomationControlled") #options.add_argument("--log-path=" + (os.path.dirname(__file__) or '.') + "/../chrome.log") options.add_argument('--ignore-certificate-errors-spki-list') options.add_argument('--ignore-certificate-errors') options.add_argument('--ignore-ssl-errors') options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option('useAutomationExtension', False) userDataDir = mkdtemp() self.tempfolder.append(userDataDir) options.add_argument('--user-data-dir=%s' % userDataDir) if 'windowSize' in user: options.add_argument("--window-size=%s" % user['windowSize']) if headless: #options.add_argument("--disable-gpu") options.add_argument("--headless") # Set user agent if available if 'userAgent' in user: options.add_argument('user-agent=%s' % user['userAgent']) #incognito argument disable the use of the proxy, DO NOT SET ! #options.add_argument("--incognito") desired_capabilities = DesiredCapabilities.CHROME.copy() # add a proxy if available if proxy: pluginfile = self.buildChromeExtension(proxy) options.add_extension(pluginfile) #Instantiate the driver #driver = WebDriver('bin/chromedriver',options=options, desired_capabilities=desired_capabilities) driver = webdriver.Remote(self.service.service_url, desired_capabilities=desired_capabilities, options=options) #Make webdriver = undefined script = ''' Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) ''' driver.execute_script(script) if pluginfile: os.remove(pluginfile) pluginfile = None return driver except: self.console.exception() if pluginfile: os.remove(pluginfile) return None
class Meeting: """ * Stores all the meeting data and houses the meeting functions """ def __init__(self, meeting_url): """ * Reads the config file and imports meeting data ! Raises exception in case of no file found / invalid file """ super().__init__() self.email = "*****@*****.**" self.password = "******" self.sleep_time = 12 self.meeting_url = meeting_url self.port = random.randint(1024, 65535) self.service = Service("chromedriver", port=self.port) def init_driver(self): """ * Initialises webdriver and sets options for headless Chrome """ options = webdriver.ChromeOptions() options.headless = True options.binary_location = os.environ["GOOGLE_CHROME_BIN"] options.add_argument('--disable-gpu') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') self.service.start() self.driver = webdriver.Remote( command_executor="http://127.0.0.1:{}".format(str(self.port)), options=options, ) self.driver.implicitly_wait(self.sleep_time) self.driver.set_window_size(3840, 2160) def try_to_join(self): """ * Tries to join meeting as per meeting data ! Raises exception if meeting data invalid """ self.driver.get(self.meeting_url) join_from_browser_link = self.driver.find_element_by_xpath( "//div[@class='desc24 webclient hideme']//a").get_attribute("href") self.driver.get(join_from_browser_link) self.driver.find_element_by_id("email").send_keys(self.email) self.driver.find_element_by_id("password").send_keys(self.password) self.driver.find_element_by_xpath("//div[@class='signin']").click() self.random_hash = ''.join( random.choices( string.ascii_letters + string.digits, k=16, )) self.driver.find_element_by_id("inputname").clear() self.driver.find_element_by_id("inputname").send_keys(self.random_hash) self.driver.find_element_by_id("joinBtn").click() def get_participants_list(self): """ * Collects participants and exports them to a CSV ! Might raise an exception if the internet is slow """ # * Sleep for a while in order to wait for the partipants list to load # ? Any better alternative time.sleep(self.sleep_time) self.driver.find_elements_by_class_name( "footer-button__button.ax-outline")[0].click() self.partipants_list = [ name.get_attribute("aria-label").split() for name in self.driver.find_elements_by_class_name("item-pos.participants-li") ] if not self.partipants_list: sys.exit(1) def leave_meeting(self): """ * Leaves the ongoing meeting and closes the browser window """ self.driver.find_element_by_class_name( "footer__leave-btn.ax-outline").click() self.driver.find_element_by_class_name( "zm-btn.zm-btn-legacy.zm-btn--primary.zm-btn__outline--blue" ).click() self.driver.quit() self.service.stop() def export_data(self): """ * Exports data to a 'participants.csv' file """ for idx, row in enumerate(self.partipants_list): self.partipants_list[idx] = row[0:row.index("audio") - 1] self.partipants_list.sort() return_object = "" for idx, row in enumerate(self.partipants_list): if row[-1] not in ["(Me)", "(Host)"]: return_object += " ".join(row) + "\n" return return_object
class CookiesGenerator(object): def __init__(self, website='default'): """ 父类, 初始化一些对象 :param website: 名称 :param browser: 浏览器, 若不使用浏览器则可设置为 None """ self.website = website self.cookies_db = RedisClient('cookies', self.website) self.accounts_db = RedisClient('accounts', self.website) self.init_browser() def __del__(self): self.close() def init_browser(self): """ 通过browser参数初始化全局浏览器供模拟登录使用 :return: """ if BROWSER_TYPE == 'PhantomJS': caps = DesiredCapabilities.PHANTOMJS caps[ "phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36' self.browser = webdriver.PhantomJS(desired_capabilities=caps) self.browser.set_window_size(1400, 500) elif BROWSER_TYPE == 'Chrome': # 启用谷歌浏览器 self.driver_service = Service( executable_path=chromedriver_path) # 括号内填写 驱动路径 chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') # 这个配置很重要 # desired_capabilities = selenium_proxy() #添加代理IP # driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver_path,desired_capabilities=desired_capabilities) # , desired_capabilities = desired_capabilities self.browser = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver_path) def new_cookies(self, username, password): """ 新生成Cookies,子类需要重写 :param username: 用户名 :param password: 密码 :return: """ raise NotImplementedError def process_cookies(self, cookies): """ 处理Cookies :param cookies: :return: """ dict = {} for cookie in cookies: dict[cookie['name']] = cookie['value'] return dict def run(self): """ 运行, 得到所有账户, 然后顺次模拟登录 :return: """ accounts_usernames = self.accounts_db.usernames() cookies_usernames = self.cookies_db.usernames() for username in accounts_usernames: # if not username in cookies_usernames: if username not in cookies_usernames: password = self.accounts_db.get(username) print('正在生成Cookies', '账号', username, '密码', password) result = self.new_cookies(username, password) # 成功获取 if result.get('status') == 1: cookies = self.process_cookies(result.get('content')) print('成功获取到Cookies', cookies) if self.cookies_db.set(username, json.dumps(cookies)): print('成功保存Cookies') # 密码错误,移除账号 elif result.get('status') == 2: print(result.get('content')) if self.accounts_db.delete(username): print('成功删除账号') else: print(result.get('content')) else: print('所有账号都已经成功获取Cookies') def close(self): """ 关闭 :return: """ try: print('Closing Browser') self.browser.close() self.driver_service.stop() del self.browser except TypeError: print('Browser not opened')
class SeleniumDriverDispatcher: def __init__(self): self.__config = None self.__driver = None self.__proxy_server = None self.__proxy = None self.__driver_service = None def __create_gui_element_dispatcher(self, element): return SeleniumDriverElementDispatcher.create_dispatcher(self, element) @property def driver(self): return self.__driver @property def proxy(self): return self.__proxy def launch(self, config): self.__config = config from .browser_launcher import BrowserLauncher svc_url = config["arjuna_options"]["SELENIUM_SERVICE_URL"] driver_download = config["arjuna_options"]["SELENIUM_DRIVER_DOWNLOAD"] browser_name = config["arjuna_options"]["BROWSER_NAME"] driver_path = config["arjuna_options"]["SELENIUM_DRIVER_PATH"] if svc_url.lower() == "not_set": from arjuna.tpi.constant import BrowserName driver_service = None driver_downloader = None if browser_name == BrowserName.CHROME: from selenium.webdriver.chrome.service import Service driver_service = Service from webdriver_manager.chrome import ChromeDriverManager driver_downloader = ChromeDriverManager elif browser_name == BrowserName.FIREFOX: from selenium.webdriver.firefox.service import Service driver_service = Service from webdriver_manager.firefox import GeckoDriverManager driver_downloader = GeckoDriverManager if driver_download: driver_path = driver_downloader().install() self.__driver_service = Service(driver_path) self.__driver_service.start() svc_url = self.__driver_service.service_url else: if not svc_url.lower().endswith("/wd/hub"): svc_url += "/wd/hub" # BrowserMob from arjuna import Arjuna bmproxy_server = Arjuna._get_bmproxy_server() if bmproxy_server is not None: self.__proxy = bmproxy_server.create_proxy() self.__driver = BrowserLauncher.launch(config, svc_url=svc_url, proxy=self.__proxy) def quit(self): DriverCommands.quit(self.__driver) if self.__driver_service: self.__driver_service.stop() if self.__proxy: self.__proxy.close() def go_to_url(self, url): DriverCommands.go_to_url(self.__driver, url) def go_back_in_browser(self): DriverCommands.go_back_in_browser(self.__driver) def go_forward_in_browser(self): DriverCommands.go_forward_in_browser(self.__driver) def refresh_browser(self): DriverCommands.refresh_browser(self.__driver) def get_source(self): return DriverCommands.get_source(self.__driver) def send_keys(self, key_str): DriverCommands.send_keys(self.__driver, key_str) def execute_javascript(self, script, *args): return DriverCommands.execute_javascript(self.__driver, script, *[ isinstance(arg, SeleniumDriverElementDispatcher) and arg.driver_element or arg for arg in args ] ) def take_screenshot(self, file_path): DriverCommands.take_screenshot(self.__driver, file_path) def take_screenshot_as_base64(self): return DriverCommands.take_screenshot_as_base64(self.__driver) def find_element(self, with_type, with_value, *, relations=None, filters=None): element = SeleniumElementFinder.find_element(self.__driver, with_type, with_value, relations=relations, filters=filters) return 1, self.__create_gui_element_dispatcher(element) def __process_single_js_element(self, element): # JS returns null, undefined if element is None: raise Exception("JavaScript could not find element.") elif not isinstance(element, WebElement): raise Exception("JavaScript returned a non-element object.") else: return element def __process_js_element_list(self, elements): if not elements: raise Exception("JavaScript could not find element.") return [self.__process_single_js_element(e) for e in elements] def __process_js_element(self, element): if type(element) is list: element = self.__process_js_element_list(element)[0] else: element = self.__process_single_js_element(element) return element def __process_js_multielement(self, elements): if type(elements) is list: elements = self.__process_js_element_list(elements) else: elements = [self.__process_single_js_element(elements)] return elements def find_element_with_js(self, js): element = self.execute_javascript(js) element = self.__process_js_element(element) return 1, self.__create_gui_element_dispatcher(element) def find_multielement(self, with_type, with_value, *, relations=None, filters=None): web_elements = SeleniumElementFinder.find_elements(self.__driver, with_type, with_value, relations=relations, filters=filters) melement = MultiElement([SeleniumDriverElementDispatcher(self, web_element) for web_element in web_elements]) return melement.get_size(), melement def find_multielement_with_js(self, js): web_elements = self.execute_javascript(js) web_elements = self.__process_js_multielement(web_elements) melement = MultiElement([SeleniumDriverElementDispatcher(self, web_element) for web_element in web_elements]) return melement.get_size(), melement def get_current_window_handle(self): return DriverCommands.get_current_window_handle(self.__driver) def get_current_window_title(self): return DriverCommands.get_window_title(self.__driver) def maximize_current_window(self): DriverCommands.maximize_window(self.__driver) def get_current_window_size(self): res = DriverCommands.get_current_window_size(self.__driver) return {"width" : res[0], "height" : res[1]} def get_all_window_handles(self): return DriverCommands.get_all_winodw_handles(self.__driver) def focus_on_window(self, handle): DriverCommands.focus_on_window(self.__driver, handle) def close_current_window(self): DriverCommands.close_current_window(self.__driver) def is_web_alert_present(self): return DriverCommands.is_web_alert_present(self.__driver) def confirm_web_alert(self): DriverCommands.confirm_web_alert(self.__driver) def dismiss_web_alert(self): DriverCommands.dismiss_web_alert(self.__driver) def get_text_from_web_alert(self): return DriverCommands.get_text_from_web_alert(self.__driver) def send_text_to_web_alert(self, text): DriverCommands.send_text_to_web_alert(self.__driver, text) def focus_on_frame(self, element_dispatcher): DriverCommands.focus_on_frame(self.__driver, element_dispatcher.driver_element) def get_element_for_setu_id(self, id): return self.__driver_elements[id] def focus_on_parent_frame(self): DriverCommands.focus_on_parent_frame(self.__driver) def focus_on_dom_root(self): DriverCommands.focus_on_dom_root(self.__driver) def perform_action_chain(self, action_chain): DriverCommands.perform_action_chain(self, self.__driver, action_chain) def hover_on_element(self, element_dispatcher): DriverCommands.hover_on_element(self.__driver, element_dispatcher.driver_element) def mouse_click_on_element(self, element_dispatcher): DriverCommands.mouse_click_on_element(self.__driver, element_dispatcher.driver_element) def double_click_on_element(self, element_dispatcher): DriverCommands.double_click_on_element(self.__driver, element_dispatcher.driver_element) def scroll_to_element(self, element_dispatcher): DriverCommands.scroll_to_element(self.__driver, element_dispatcher.driver_element)
browser.quit() continue # random body temperature input_fields = browser.find_elements_by_tag_name('input') for i in input_fields: if i.get_attribute("name").find("DZ_JSDTCJTW") >= 0: # scroll down until body temperature textfield is visible browser.execute_script("arguments[0].scrollIntoView();", i) i.click() i.send_keys(str(random.randint(362, 370) / 10.0)) break time.sleep(10) # save save_btn = browser.find_element_by_css_selector( "div#save.bh-btn.bh-btn-primary") time.sleep(5) save_btn.click() time.sleep(5) # confirm confirm_btn = browser.find_element_by_css_selector( "a.bh-dialog-btn.bh-bg-primary.bh-color-primary-5") confirm_btn.click() time.sleep(5) browser.quit() print("Successfully fill in!") driver_service.stop()
return True username = ['201xxx', '201xxx'] password = ['xxx', 'xxx'] province = ['河南省', '河南省'] city = ['郑州市', '郑州市'] receiver = ['@qq.com', '@163.com'] num = len(username) res = [] for i in range(num): res.append(dk(username[i], password[i], province[i], city[i])) driver.quit() service.stop() os.system('taskkill /im chromedriver.exe /F') os.system('taskkill /im chrome.exe /F') #sender_username为发件人的账号 sender_username = '******' #pwd为邮箱的授权码 pwd = 'xxx' #邮件的正文内容 fenxiang_img = get_jsciba() djt = get_djt() mail_content = f''' <p>{today_md}打卡完成</p> <img src="{fenxiang_img}"> <p>{djt}</p>
class Chrome(WebDriver): """ Controls the ChromeDriver and allows you to drive the browser. You will need to download the ChromeDriver executable from http://chromedriver.storage.googleapis.com/index.html """ def __init__(self, executable_path="chromedriver", port=0, options=None, service_args=None, desired_capabilities=None, service_log_path=None, chrome_options=None, keep_alive=True): """ Creates a new instance of the chrome driver. Starts the service and then creates new instance of chrome driver. :Args: - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH - port - port you would like the service to run, if left as 0, a free port will be found. - options - this takes an instance of ChromeOptions - service_args - List of args to pass to the driver service - desired_capabilities - Dictionary object with non-browser specific capabilities only, such as "proxy" or "loggingPref". - service_log_path - Where to log information from the driver. - chrome_options - Deprecated argument for options - keep_alive - Whether to configure ChromeRemoteConnection to use HTTP keep-alive. """ if chrome_options: warnings.warn('use options instead of chrome_options', DeprecationWarning, stacklevel=2) options = chrome_options if options is None: # desired_capabilities stays as passed in if desired_capabilities is None: desired_capabilities = self.create_options().to_capabilities() else: if desired_capabilities is None: desired_capabilities = options.to_capabilities() else: desired_capabilities.update(options.to_capabilities()) self.service = Service(executable_path, port=port, service_args=service_args, log_path=service_log_path) self.service.start() try: WebDriver.__init__(self, command_executor=ChromeRemoteConnection( remote_server_addr=self.service.service_url, keep_alive=keep_alive), desired_capabilities=desired_capabilities) except Exception: self.quit() raise self._is_remote = False def launch_app(self, id): """Launches Chrome app specified by id.""" return self.execute("launchApp", {'id': id}) def get_network_conditions(self): """ Gets Chrome network emulation settings. :Returns: A dict. For example: {'latency': 4, 'download_throughput': 2, 'upload_throughput': 2, 'offline': False} """ return self.execute("getNetworkConditions")['value'] def set_network_conditions(self, **network_conditions): """ Sets Chrome network emulation settings. :Args: - network_conditions: A dict with conditions specification. :Usage: driver.set_network_conditions( offline=False, latency=5, # additional latency (ms) download_throughput=500 * 1024, # maximal throughput upload_throughput=500 * 1024) # maximal throughput Note: 'throughput' can be used to set both (for download and upload). """ self.execute("setNetworkConditions", {'network_conditions': network_conditions}) def execute_cdp_cmd(self, cmd, cmd_args): """ Execute Chrome Devtools Protocol command and get returned result The command and command args should follow chrome devtools protocol domains/commands, refer to link https://chromedevtools.github.io/devtools-protocol/ :Args: - cmd: A str, command name - cmd_args: A dict, command args. empty dict {} if there is no command args :Usage: driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) :Returns: A dict, empty dict {} if there is no result to return. For example to getResponseBody: {'base64Encoded': False, 'body': 'response body string'} """ return self.execute("executeCdpCommand", { 'cmd': cmd, 'params': cmd_args })['value'] def quit(self): """ Closes the browser and shuts down the ChromeDriver executable that is started when starting the ChromeDriver """ try: WebDriver.quit(self) except Exception: # We don't care about the message because something probably has gone wrong pass finally: self.service.stop() @staticmethod def create_options(): return Options()
def register(country_code, email, password): global cache # Login try: c_service = Service("/usr/bin/chromedriver") c_service.command_line_args() c_service.start() selenium_list = [x.strip() for x in open("node.txt", "r").readlines()] entry = random.choice(selenium_list) driver = webdriver.Remote( command_executor='http://%s:4444/wd/hub' % entry, desired_capabilities=chrome_options.to_capabilities()) print("Choose Node:", entry) if email in cache: session, schedule_id, group_id = cache[email] new_session = change_region(country_code, session, group_id) driver.get("https://ais.usvisa-info.com") driver.add_cookie({ 'name': '_yatri_session', 'value': new_session, 'path': '/', 'domain': 'ais.usvisa-info.com', 'secure': True }) driver.get("https://ais.usvisa-info.com/%s/niv/groups/%s" % (country_code, group_id)) else: driver.get("https://ais.usvisa-info.com/%s/niv/users/sign_in" % country_code) email_box = driver.find_element_by_id("user_email") email_box.clear() email_box.send_keys(email) password_box = driver.find_element_by_id("user_password") password_box.clear() password_box.send_keys(password) driver.execute_script( "document.getElementById('policy_confirmed').click()") signin_button = driver.find_element_by_name("commit") signin_button.click() def wait_loading(xpath, option="locate"): try: if option == "locate": element_present = EC.presence_of_element_located( (By.XPATH, xpath)) elif option == "clickable": element_present = EC.element_to_be_clickable( (By.XPATH, xpath)) WebDriverWait(driver, wait_timeout).until(element_present) except TimeoutException: print("Timed out waiting for page to load") driver.execute_script("window.scrollTo(0, 1080)") driver.save_screenshot("test.png") # Continue continue_button_xpath = "//a[contains(text(), 'Continue')]" wait_loading(continue_button_xpath) current_url = driver.current_url group_id = current_url.split("/")[-1] continue_button = driver.find_element_by_xpath(continue_button_xpath) continue_button.click() # Choose action pay_button_xpath = "//a[contains(text(), 'Pay Visa Fee')]" wait_loading(pay_button_xpath) banner = driver.find_element_by_tag_name('h5') banner.click() wait_loading(pay_button_xpath, option="clickable") pay_button = driver.find_element_by_xpath(pay_button_xpath) pay_button.click() # Collect result title_xpath = "//h2[contains(text(), 'MRV Fee Details')]" wait_loading(title_xpath) time_table = driver.find_element_by_class_name('for-layout') result = [] if time_table: trs = time_table.find_elements_by_tag_name('tr') for tr in trs: tds = tr.find_elements_by_tag_name('td') if not len(tds) == 2: continue place = tds[0].text date_str = tds[1].text s = date_str.split() year, month, day = 0, 0, 0 if len(s) >= 3 and s[0] != "No": day_str, month_str, year_str = s[-3], s[-2].replace( ",", ""), s[-1] year, month, day = int(year_str), g.MONTH[month_str], int( day_str) result.append([place, (year, month, day)]) current_url = driver.current_url schedule_id = current_url.split("/")[-2] session = driver.get_cookie("_yatri_session")["value"] driver.quit() c_service.stop() if result: cache[email] = [session, schedule_id, group_id] else: del cache[email] return result, session, schedule_id except Exception as e: if email in cache: del cache[email] print(str(e)) if driver: driver.quit() if c_service: c_service.stop() return None, None, None
def donghangcrawler(city1, city2, date_in): print("东方航空爬虫开始运行") infolist = [] pricelist = [] resultlist = [] cell = {} date_in = date_in[0:4] + '-' + date_in[4:6] + '-' + date_in[6:8] c_service = Service('./webdriver/chromedriver.exe') c_service.command_line_args() browser = webdriver.Chrome(executable_path='./webdriver/chromedriver.exe') c_service.start() browser.get('http://www.ceair.com/') ad = browser.find_element_by_id("appd_wrap_close") citya = browser.find_element_by_id("label_ID_0") # 出发城市 cityb = browser.find_element_by_id("label_ID_1") # 到达城市 date = browser.find_element_by_id("depDt") datex = browser.find_element_by_id("deptDtRt") search = browser.find_element_by_id("btn_flight_search") # 查询按钮 time.sleep(0.5) # 必须等待页面加载结束后开始操作,否则会被当机器人 ad.click() # 关闭广告,会遮挡查询页面 time.sleep(0.5) citya.clear() # 删除默认输入 time.sleep(0.5) citya.send_keys(city1) time.sleep(0.5) citya.send_keys(Keys.TAB) cityb.send_keys(city2) time.sleep(0.5) cityb.send_keys(Keys.TAB) time.sleep(0.5) date.send_keys(Keys.BACKSPACE, Keys.BACKSPACE, Keys.BACKSPACE, Keys.BACKSPACE, Keys.BACKSPACE, Keys.BACKSPACE, Keys.BACKSPACE, Keys.BACKSPACE, Keys.BACKSPACE, Keys.BACKSPACE, date_in) # 清除默认值重新赋值,不能用clear time.sleep(0.5) date.send_keys(Keys.TAB) # 消除弹出菜单 time.sleep(0.5) datex.send_keys(Keys.TAB) # 消除日历菜单 time.sleep(0.5) search.click() time.sleep(3) try: browser.switch_to.window(browser.window_handles[1]) # 定位到跳转后的查询结果页面 except BaseException: time.sleep(3) search.click() time.sleep(3) browser.switch_to.window(browser.window_handles[1]) # 定位到跳转后的查询结果页面 time.sleep(5) info = browser.find_elements_by_xpath( "//section[@class='summary']") # 信息模块组 price = browser.find_elements_by_xpath( "//dd[@data-type='economy']") # 价格方块组 if info is None or price is None: print("----东航--nodata----") return resultlist for i in info: clean = str(i.text).strip().split() # 解析机票基本信息 # ['东方航空', '|', 'MU5104|直达|', '09:00', '首都国际机场', 'T2', '直达', '11:15', '虹桥国际机场', 'T2', '02小时15分钟'] clean.remove('|') clean[1] = clean[1].split('|')[0] # 提取航班号 infolist.append(clean) for j in price: clean = str(j.text) if clean: clean = clean.split()[1] else: clean = '售完' pricelist.append(clean) for info_, price_ in zip(infolist, pricelist): cell['Airline'] = info_[0] cell['FlightNumber'] = info_[1] cell['dTime'] = info_[2] cell['dAirport'] = info_[3] cell['aTime'] = info_[6] cell['aAirport'] = info_[7] cell['LowestPrice'] = price_ resultlist.append(cell.copy()) browser.quit() c_service.stop() # with open('./data/donghang.csv', 'w', encoding='utf-8') as csvfile: # 运行整个项目时解除此注释 # writer = csv.writer(csvfile, delimiter=',') # for i in resultlist: # writer.writerow([city1, city2, i.get("Airline"), i.get('FlightNumber'), i.get('dAirport'), # i.get('aAirport'), i.get('dTime'), i.get('aTime'), i.get('LowestPrice'), '东方航空']) # csvfile.close() print("东方航空爬虫运行结束") return resultlist