Exemple #1
0
def get_movie_url(url):
    data_dict = {}
    i = 0
    html = ask_url(url)
    doc = etree.HTML(html)
    # 所有级数的a标签和文本
    all_url = doc.xpath('//div[@class="fed-play-item fed-drop-item fed-visible"]//ul[@class="fed-part-rows"]/li/a/@href')
    all_title = doc.xpath('//div[@class="fed-play-item fed-drop-item fed-visible"]'
                          '//ul[@class="fed-part-rows"]/li/a/text()')
    # 用selenium获取iframe里的src
    c_service = Service('/usr/bin/chromedriver')
    c_service.command_line_args()
    c_service.start()
    option = webdriver.ChromeOptions()
    option.add_argument('--headless')
    option.add_argument('--no-sandbox')
    option.add_argument('--disable-dev-shm-usage')
    browser = webdriver.Chrome('/usr/bin/chromedriver', options=option)
    # print('正在爬取视频链接中')
    for url in all_url:
        browser.get('https://kuyun.tv'+url)
        movie_url = browser.find_element_by_id('fed-play-iframe').get_attribute('src')
        data_dict[all_title[i]] = movie_url
        i = i+1
    browser.quit()
    c_service.stop()
    return data_dict
Exemple #2
0
def punch(StudentId, Name):
    c_service = Service('/Users/wq//Downloads/chromedriver')
    c_service.command_line_args()
    c_service.start()
    driver = webdriver.Chrome(
        '/Users/wq/Downloads/chromedriver')  # 选择Chrome浏览器
    driver.get('http://xsc.sicau.edu.cn/SPCP')  # 打开网站
    #采用xpath定位
    result = driver.find_element_by_xpath('//*[@id="code-box"]')
    text = result.text
    driver.find_element_by_xpath('//*[@id="StudentId"]').click()
    driver.find_element_by_xpath('//*[@id="StudentId"]').send_keys(StudentId)
    driver.find_element_by_xpath('//*[@id="Name"]').click()
    driver.find_element_by_xpath('//*[@id="Name"]').send_keys(Name)
    driver.find_element_by_xpath('//*[@id="codeInput"]').click()
    driver.find_element_by_xpath('//*[@id="codeInput"]').send_keys(text)
    driver.find_element_by_xpath('//*[@id="Submit"]').click()
    driver.find_element_by_xpath('//*[@id="platfrom2"]').click()
    try:
        driver.find_element_by_xpath('//*[@id="ckCLS"]').click()
        driver.find_element_by_xpath('//*[@id="SaveBtnDiv"]/button').click()
    except:
        driver.find_element_by_xpath(
            '//*[@id="layui-layer1"]/div[3]/a').click()
    driver.quit()
    c_service.stop()
class ChromeDriverManager(DriverManager):
    __chservice = None

    def launch_browser(self):
        chrome_option = Options()
        chrome_option.add_argument("--disable-infobars")
        chrome_option.add_argument("--start-maximized")
        chrome_option.add_argument("--disable-popup-blocking")
        cur_dir_path = os.path.dirname(os.path.realpath(__file__))
        chromedriver = cur_dir_path.split(
            sep='\\base')[0] + QEEnvironment.get_environment_dict().get(
                'BrowserPath')
        os.environ["webdriver.chrome.driver"] = chromedriver
        driver = webdriver.Chrome(chromedriver, options=chrome_option)
        driver.get('https://ui.cogmento.com/')

    def start_service(self):
        try:
            if self.__chservice == None:
                cur_dir_path = os.path.dirname(os.path.realpath(__file__))
                driver_path = cur_dir_path.split(
                    sep='\\base')[0] + QEEnvironment.get_environment_dict(
                    ).get('BrowserPath')
                self.__chservice = Service(driver_path)
                self.__chservice.start()
                print('Service is started')
        except:
            print(traceback.print_exc())

    def stop_service(self):
        if self.__chservice != None and self.__chservice.is_connectable():
            print('Stop service')
            self.__chservice.stop()

    def create_driver(self):
        chrome_option = webdriver.ChromeOptions()
        chrome_option.add_argument("--disable-infobars")
        chrome_option.add_argument("--start-maximized")
        chrome_option.add_argument("--disable-popup-blocking")
        capabilities = DesiredCapabilities.CHROME.copy()
        capabilities['browser'] = 'chrome'
        capabilities = chrome_option.to_capabilities()
        self.driver = webdriver.Remote(self.__chservice.service_url,
                                       desired_capabilities=capabilities)
        self.edriver = EventFiringWebDriver(self.driver, EventListener())
        self.edriver.implicitly_wait(
            QEEnvironment.get_environment_dict().get('ImplicitWait'))
        self.edriver.get(QEEnvironment.get_environment_dict().get('URL'))
Exemple #4
0
def getcook():
    loginurl = 'http://113.57.169.227:8088/ccps/login.jsp'  # 登录页面
    path = r'd:\chromedriver.exe'
    # 加载webdriver驱动,用于获取登录页面标签属性

    # driver = webdriver.Chrome(r'd:\chromedriver.exe')
    # option = webdriver.ChromeOptions()
    # option.binary_location = r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
    # option.add_argument('--headless') #增加无界面选项
    # option.add_argument('--disable-gpu') #如果不加这个选项,有时定位会出现问题
    # option.add_experimental_option('excludeSwitches', ['enable-logging'])
    c_service = Service(path)
    c_service.command_line_args()
    c_service.start()

    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')

    driver = webdriver.Chrome(executable_path=path, options=chrome_options)
    driver.get(loginurl)  # 请求登录页面
    driver.find_element_by_id('wcode').clear()  # 获取用户名输入框,并先清空
    driver.find_element_by_id('wcode').send_keys(u'WHBK100')  # 输入用户名
    driver.find_element_by_id('password').clear()  # 获取密码框,并清空
    driver.find_element_by_id('password').send_keys(u'')  # 输入密码

    #captcha = driver.find_element_by_id('captcha_image')  # 获取验证码标签
    #submit = driver.find_element_by_css_selector('a[name="登录"]')  # 获取提交按钮
    submit = driver.find_element_by_link_text("登录")
    # 判断是否需要验证码
    captcha = []
    if captcha:
        captcha_field = driver.find_element_by_id('captcha_field')  # 获取验证码输入框
        text = input("请输入验证码:")  # 控制栏输入验证码
        captcha_field.send_keys(text)  # 将输入的验证码传递给selenium打开的浏览器
        submit.click()  # 按钮提交并登录
    else:
        submit.click()  # 无验证码则直接登录提交

    cookies = driver.get_cookies()  # 获取COOK

    #driver.get('http://113.57.169.227:8088/ccps//workorder/findWorkOrderList.action?workOrder.range=yff&workOrder.standby3=order_deal')  # 请求其他页面
    time.sleep(1)
    driver.quit()
    c_service.stop()
    #print(cookies)
    return cookies  # 返回cookies 之后其他方法可以调用,这样不用每次请求都返回登录
def get_urls(xingqi, A):

    #由于在后台打开浏览器,因此不能很好的关闭,所以用service.start(),service.close()控制进程开关
    c_service = Service(
        'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe')
    c_service.command_line_args()
    c_service.start()
    #使用谷歌自带的无头浏览器模式,悄无声息地运行
    opt = Options()
    opt.add_argument('--headless')
    browser = webdriver.Chrome(chrome_options=opt)
    wait = WebDriverWait(browser, 10)  #设置延迟10秒,等待网页加载
    #
    browser.get('http://www.qqshidao.com/index.php?c=home&a=bifen')
    time.sleep(3)
    submit = wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, '//*[@id="app"]/div[6]/div/span[6]')))
    submit.click()
    time.sleep(2)
    js = 'var q=document.documentElement.scrollTop=100000'  #设置往下拉网页的长度,设置大点,直接拉到底部
    browser.execute_script(js)  #发现当比赛较多时,往下拉一下后加载新的数据,因此,自动往下拉一下就停了
    time.sleep(1)
    browser.execute_script(js)  #再往下拉一下,加载全部比赛
    time.sleep(2)

    yuanma = browser.page_source

    browser.quit()
    c_service.stop()

    s = etree.HTML(yuanma)
    urls = []
    urls_ = s.xpath('//*[@id="app"]/div[7]/div/table/tbody/tr[@data-fid]')
    for each in urls_:
        fid = each.attrib['data-fid']

        xingqiji = each.xpath('./td[3]/text()')[0]

        if xingqi in xingqiji:
            url = 'http://www.qqshidao.com/index.php?c=odds&a=betfair&fid={}'.format(
                fid)
            A['{}'.format(url)] = xingqiji
            urls.append(url)

    return urls
Exemple #6
0
def download_by_webdriver(url, charset='utf-8', proxy=None, user_agent=None):
    # 传入URL,使用浏览器下载后,返回页面。
    print("[download_by_webdriver]: begin download the link %s" % url)
    try:
        # 进入浏览器设置
        options = webdriver.ChromeOptions()
        # 谷歌无头模式
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        # options.add_argument('window-size=1200x600')
        # 设置中文
        options.add_argument('lang=zh_CN.UTF-8')
        # 设置代理
        if proxy:
            print("[download_by_webdriver]: use proxy %s" % proxy)
            options.add_argument('proxy-server=' + proxy)
        # 添加头
        if user_agent:
            options.add_argument('user-agent=' + user_agent)
        else:
            options.add_argument(
                'user-agent=' + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) '
                                'AppleWebKit/537.36 (KHTML, like Gecko) '
                                'Chrome/71.0.3578.98 Safari/537.36')
        # 设置驱动服务
        c_service = Service('/usr/local/bin/chromedriver')
        c_service.command_line_args()
        c_service.start()
        driver = webdriver.Chrome(chrome_options=options)
        driver.get(url)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        driver.implicitly_wait(10)
        driver.set_page_load_timeout(15)
        p_content = driver.page_source.encode(charset, "ignore").decode(charset, 'ignore')
        current_url = driver.current_url
        driver.quit()
        c_service.stop()
    except Exception as e:
        print("[download_by_webdriver]:", e)
        p_content, current_url = None, None
    return p_content, current_url
Exemple #7
0
def download_by_webdriver(url, charset='utf-8'):
    # 传入URL,使用浏览器下载后,返回页面。
    print("[download_by_webdriver]: begin download the link %s" % url)
    try:
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--headless')
        c_service = Service('/usr/local/bin/chromedriver')
        c_service.command_line_args()
        c_service.start()
        driver = webdriver.Chrome(chrome_options=chrome_options)
        driver.get(url)
        driver.implicitly_wait(10)
        content = driver.page_source.encode(charset, "ignore").decode(
            charset, 'ignore')
        current_url = driver.current_url
        driver.quit()
        c_service.stop()
    except Exception as e:
        print("[download_by_webdriver]:", e)
        content, current_url = None, None
    return content, current_url
Exemple #8
0
class SeleniumExampleTest(unittest.TestCase):
    def setUp(self):
        print("Setting up")
        self.__app_url = "http://192.168.56.101/wp-admin"
        self.__app_logout_url = "http://192.168.56.101/wp-login.php?action=logout"
        self.username = "******"
        self.password = "******"

        driver_path = ChromeDriverManager().install()
        self.svc = Service(driver_path)
        self.svc.start()
        self.driver = webdriver.Remote(self.svc.service_url)
        self.waiter = WebDriverWait(self.driver, 30)

    def tearDown(self):
        print("Cleaning up")
        self.driver.quit()
        self.svc.stop()

    def test_wp_login(self):
        self.driver.get(self.__app_url)
        self.waiter.until(EC.presence_of_element_located((By.ID, "wp-submit")))
        element = self.waiter.until(
            EC.element_to_be_clickable((By.NAME, "log")))
        element.send_keys(self.username)
        element = self.waiter.until(
            EC.element_to_be_clickable((By.NAME, "pwd")))
        element.send_keys(self.password)
        element = self.waiter.until(
            EC.element_to_be_clickable(
                (By.CSS_SELECTOR, "input[type *= 'sub']")))
        element.click()
        self.waiter.until(
            EC.presence_of_element_located(
                (By.XPATH, "//div[contains(*//text(), 'Welcome')]")))
        self.driver.get()
Exemple #9
0
from selenium import webdriver
from selenium.webdriver.chrome.service import Service

base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(base_dir + '/bin')
driver_path = base_dir + '/bin/chromedriver'


# 初始化 chrome service
chrome_service = Service(executable_path=driver_path)
chrome_service.command_line_args()
chrome_service.start()

# 初始化 driver
driver = webdriver.Chrome(driver_path)
driver.implicitly_wait(30)
driver.maximize_window()

driver.get("http://www.linkedsee.com/")

products = driver.find_elements_by_xpath("//ul[@class='drop box-1']/li/a")
print(products)

print("found " + str(len(products)) + "products")

for product in products:
    print(product.get_attribute('textContent'))

driver.quit()
chrome_service.stop()
Exemple #10
0
class Spider:
    def __init__(self):
        self.c_service = Service(
            'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe'
        )
        self.c_service.command_line_args()
        self.c_service.start()

        chrome_options = Options()
        chrome_options.add_argument('--headless')  #不显示界面
        #chrome_options.add_argument('--disable-gpu')
        self.driver = webdriver.Chrome(chrome_options=chrome_options)
        self.url = "https://music.163.com/#/discover/toplist?id=3778678"
        '''找到所有热门歌曲的URL和歌曲名称'''

    def find_allSong(self):
        # self.driver.implicitly_wait(10)
        #print("start")
        WebDriverWait(self.driver, 3, 0.5).until(
            lambda driver: self.driver.find_element_by_id("g_iframe"))
        #print("All download")
        self.driver.switch_to.frame(self.driver.find_element_by_id("g_iframe"))
        # with open("test3.html",'w',encoding='UTF-8') as file_obj:
        #     file_obj.write(self.driver.page_source)

        # header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'}
        # response = driver.get(self.url,headers = header)                      #请求网址
        # print(response.request.headers)
        # print(response.headers)

        # print(response.status_code)
        # print(response.content.decode("utf-8"))

        soup = BeautifulSoup(self.driver.page_source, "lxml")  #
        song_list = soup.find('tbody')  # 找到所有歌曲
        all_song = song_list.find_all('tr')
        for each_song in all_song:
            each_info = each_song.find('span', class_="txt")
            # each_info_2 = each_song.find('span', class_="title")
            #each_info = each_song.find_all('span')
            long_time = each_song.find('span', class_="u-dur").text
            author = each_song.find('span',
                                    class_="icn icn-share")['data-res-author']
            link = "https://music.163.com/" + each_info.find('a')['href']
            name = each_info.find('b')['title']
            print('歌曲连接 : {}, 歌曲名 : {},作者 : {},时长 : {}'.format(
                link, name, author, long_time))
            self.comment(link, name)  #爬取评论

    def comment(self, link, name):
        self.driver.execute_script("window.open('%s')" % link)
        # print(self.driver.window_handles)
        # print(self.driver.current_window_handle)
        self.driver.switch_to.window(window_name=self.driver.window_handles[1])
        # print(self.driver.current_window_handle)
        WebDriverWait(self.driver, 3, 0.5).until(
            lambda driver: self.driver.find_element_by_id("g_iframe"))
        self.driver.switch_to.frame(self.driver.find_element_by_id("g_iframe"))
        page = 0
        with open(name + "-评论信息.csv", 'w', encoding="utf-8") as fp:
            fp.write('评论者,评论内容,评论日期' + '\n')

        while True:
            page += 1
            soup = BeautifulSoup(self.driver.page_source, "lxml")
            all_comment = soup.find_all('div', class_='itm')
            #print(soup)
            for each_comment in all_comment:
                comment_name = each_comment.find('a', class_='s-fc7').text
                tmp = each_comment.find('div', class_='cnt f-brk').text
                date = each_comment.find('div', class_='time s-fc4').text
                comment = tmp.replace(comment_name + ':', '')
                #print(comment_name, comment)

                with open(name + "-评论信息.csv", 'a+', encoding="utf-8") as fp:
                    fp.write(comment_name + ',' + comment + ',' + date + '\n')
            '''
            下一页
            '''
            try:
                print("{}--第{}页.".format(name, page))
                next = self.driver.find_element_by_xpath(
                    '//*[starts-with(@class,"zbtn znxt") and not(contains(@class,"js-disabled"))]'
                )  #xpath路径匹配节点
            except:
                print(self.driver.current_window_handle)
                self.driver.switch_to.window(
                    window_name=self.driver.window_handles[0])
                print(self.driver.current_window_handle)
                self.driver.close()  # 关闭当前窗口

                return
            #last = self.driver.find_element_by_xpath('//*[starts-with(@class,"zbtn znxt js-n") and contains(@class,"js-disabled")]')
            else:
                next.send_keys(Keys.ENTER)

                time.sleep(0.5)

    def run(self):
        self.driver.get(self.url)  #启动浏览器
        self.find_allSong()
        #self.comment()
        #self.driver.close()             # 关闭当前窗口
        self.driver.quit()  # 关闭进程

    def __del__(self):
        self.c_service.stop()
Exemple #11
0
class WebDriver(RemoteWebDriver):
    """
    Controls the ChromeDriver and allows you to drive the browser.

    You will need to download the ChromeDriver executable from
    http://chromedriver.storage.googleapis.com/index.html
    """
    def __init__(self,
                 executable_path="chromedriver",
                 port=0,
                 chrome_options=None,
                 service_args=None,
                 desired_capabilities=None,
                 service_log_path=None):
        """
        Creates a new instance of the chrome driver.

        Starts the service and then creates new instance of chrome driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - desired_capabilities: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - chrome_options: this takes an instance of ChromeOptions
        """
        if chrome_options is None:
            # desired_capabilities stays as passed in
            if desired_capabilities is None:
                desired_capabilities = self.create_options().to_capabilities()
        else:
            if desired_capabilities is None:
                desired_capabilities = chrome_options.to_capabilities()
            else:
                desired_capabilities.update(chrome_options.to_capabilities())

        self.service = Service(executable_path,
                               port=port,
                               service_args=service_args,
                               log_path=service_log_path)
        self.service.start()

        try:
            RemoteWebDriver.__init__(
                self,
                command_executor=ChromeRemoteConnection(
                    remote_server_addr=self.service.service_url),
                desired_capabilities=desired_capabilities)
        except:
            self.quit()
            raise
        self._is_remote = False

    def launch_app(self, id):
        """Launches Chrome app specified by id."""
        return self.execute("launchApp", {'id': id})

    def quit(self):
        """
        Closes the browser and shuts down the ChromeDriver executable
        that is started when starting the ChromeDriver
        """
        try:
            RemoteWebDriver.quit(self)
        except:
            # We don't care about the message because something probably has gone wrong
            pass
        finally:
            self.service.stop()

    def create_options(self):
        return Options()
Exemple #12
0
def AutoInfoSpider(url):
    # 启动服务
    c_service = Service(
        'C:/Users/gusisong/AppData/Local/Continuum/anaconda3/chromedriver.exe')
    c_service.command_line_args()
    c_service.start()

    # 功能配置
    option = webdriver.ChromeOptions()
    option.add_argument('--headless')
    option.add_argument('--disable-gpu')
    option.add_argument('--disable-images')
    option.add_argument('--disable-javascript')
    option.add_argument('--disable-plugins')
    option.add_argument('--no-sandbox')
    driver = webdriver.Chrome(options=option)

    driver.get(url)
    output = []

    for letter_index in range(1, 25):
        # 按品牌抓取
        brand_list = driver.find_elements_by_xpath(
            '/html/body/div[8]/div[1]/div[2]/div/div[{}]//a/div'.format(
                letter_index))
        for brand_index in range(2, len(brand_list) + 2):
            # 记录品牌名称
            brand_name = driver.find_element_by_xpath(
                '/html/body/div[8]/div[1]/div[2]/div/div[{0}]/div[{1}]/a/div'.
                format(letter_index, brand_index)).text
            # 进入车型目录
            driver.find_element_by_xpath(
                '/html/body/div[8]/div[1]/div[2]/div/div[{0}]/div[{1}]/a/div'.
                format(letter_index, brand_index)).click()

            # 按页码抓取
            page_list = driver.find_elements_by_xpath(
                '/html/body/div[8]/div[4]/div[5]/div/div/div/a')
            for page_index in range(1, len(page_list) + 1):

                if page_index > 1:
                    driver.find_element_by_xpath(
                        '/html/body/div[8]/div[4]/div[5]/div/div/div/a[{}]'.
                        format(page_index)).click()

                model_list = driver.find_elements_by_xpath(
                    '/html/body/div[8]/div[4]/div[3]/div')
                for model_index in range(1, len(model_list) + 1):
                    # 记录车型名称
                    model_name = driver.find_element_by_xpath(
                        '/html/body/div[8]/div[4]/div[3]/div[{}]/a/p[1]'.
                        format(model_index)).text

                    # 记录车型价格
                    price_range = driver.find_element_by_xpath(
                        '/html/body/div[8]/div[4]/div[3]/div[{}]/a/p[2]'.
                        format(model_index)).text

                    if '-' in price_range:
                        min_price = price_range[:-1].split('-')[0] + '万'
                        max_price = price_range[:-1].split('-')[1] + '万'

                    elif '暂无' in price_range:
                        min_price = '暂无'
                        max_price = '暂无'

                    else:
                        min_price = price_range
                        max_price = price_range

                    # 记录图片链接
                    pic_link = driver.find_element_by_xpath(
                        '/html/body/div[8]/div[4]/div[3]/div[{}]/a/img'.format(
                            model_index)).get_attribute("src")

                    print(brand_name, model_name, min_price, max_price,
                          pic_link)
                    output.append([
                        brand_name, model_name, min_price, max_price, pic_link
                    ])

    # 写入文件
    df = DataFrame(output)
    df.columns = ['品牌名', '车型', '最低价', '最高价', '图片链接']
    df.to_csv('auto_info.csv', encoding='utf_8_sig')

    # 后台完全关闭
    driver.close()
    driver.quit()
    c_service.stop()
Exemple #13
0
class MyWebBoost():
    def __init__(self, chromedriver_path, target_url):
        chromedriver = chromedriver_path
        chrome_options = Options()
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--disable-gpu')
        self.s = Service(chromedriver)
        self.driver = webdriver.Chrome(service=self.s, options=chrome_options)
        # 设置超时,双重保险
        self.driver.set_page_load_timeout(30)
        self.driver.set_script_timeout(30)
        self.target_url = target_url

    # 0.开始获取连接
    def startConnect(self):
        global maxConnectTime

        # 最多尝试3次,不行就直接退出
        if maxConnectTime == 3:
            self.quit()
            return

        isSuccess = self.getUrl()

        # 地址获取成功
        if isSuccess:
            self.startMyWork()

        # 获取失败,递归加载,最多尝试3次
        else:
            maxConnectTime = maxConnectTime + 1
            time.sleep(1)
            self.startConnect()

    # 1.尝试登录url
    def getUrl(self):
        try:
            self.driver.get(self.target_url)
            return True

        except Exception as e:
            logging.error(
                "------------ts.js.vnet.cn 首次加载失败!------------")
            return False

    # 2、执行方法
    def startMyWork(self):

        global maxLoadTime

        # 最多尝试3次,不行就直接退出
        if maxLoadTime == 6:
            self.quit()
            return

        # 开始操作
        flag, d_time = self.startClick()

        if flag:
            logging.error("操作成功!")
            self.quit()

            # senWChat("提速成功!")
            return
        else:
            # 失败,时间对不上,则sleep
            maxLoadTime = maxLoadTime + 1
            logging.error("需要休眠:%s秒" % str(d_time))
            time.sleep(d_time + 5)
            # 刷新
            if self.getUrl():
                self.startMyWork()
            else:
                self.quit()
                return

    # 3、点击事件
    def startClick(self):
        global maxRefreshTime
        try:
            self.driver.get_screenshot_as_file('/root/startClick.png')
            # 1、点击同意书
            readBtn = self.driver.find_elements(By.XPATH, '//*[@id="CheckAgree"]')[0]
            if readBtn.is_displayed():
                # logging.error(
                #     "-----------已查询到同意书-------------")
                readBtn.click()

                time.sleep(1)
                self.driver.find_elements(By.XPATH, '//*[@id="ButtonAgree"]')[0].click()

                time.sleep(3)

                # self.driver.get_screenshot_as_file('/root/clicked_submit.png')
                self.driver.find_elements(By.XPATH, '//div[@id="CmdBtn"]/div/input[2]')[0].click()

                time.sleep(5)
            # 2、点击试用按钮
            else:
                # logging.error(
                #     "-----------不需要查询同意书,直接试用-------------")
                self.driver.find_elements(By.XPATH,
                                          '//div[@id="CmdBtn"]/div[1]/input[2]')[0].click()
                time.sleep(5)

        except Exception as e:
            # logging.error(e)

            # self.driver.get_screenshot_as_file('/volume2/web/tryfailed.png')

            maxRefreshTime = maxRefreshTime + 1
            if maxRefreshTime == 8:
                self.quit()
            else:
                if self.getUrl():
                    self.startMyWork()
                else:
                    self.quit()

        try:
            self.driver.get_screenshot_as_file('/root/clicked_try.png')
            # 查看是否已经到期
            # WebDriverWait(self.driver, 10).until(
            #     EC.presence_of_element_located((By.ID, "btnExperiencesOK")))
            # 已经到期——点击确定试用
            time.sleep(5)
            self.driver.find_element(By.XPATH, '//input[@id="btnExperiencesOK"]').click()
            # text = driver.find_element_by_xpath(
            #     '//p[@id="OpenResMessage"]').text
            # 截图
            # self.driver.get_screenshot_as_file('/volume2/web/ss.png')
            return True, 0

        except Exception as e:
            self.driver.get_screenshot_as_file('/root/clicked_fail.png')
            # 没有到期,找到到期的日期
            text = self.driver.find_element(By.XPATH, '//p[@id="OpenResMessage"]').text
            # text = "亲,您今天的下行体验时间将于00点08分到期"
            logging.error(text)

            if len(text) > 0:
                # TODO
                delayTime = self.getDelayTime(text)
                # logging.info("最后需要等待的时间(s):" + str(delayTime))
                return False, delayTime
            else:
                maxRefreshTime = maxRefreshTime + 1
                if maxRefreshTime == 10:
                    self.quit()
                else:
                    if self.getUrl():
                        self.startMyWork()
                    else:
                        self.quit()

    # 算出延迟的时间
    @ staticmethod
    def getDelayTime(text):

        logging.info("提醒时间:" + text)

        endTime = text.split('将于')[1].split("分到期")[0].replace('点', ':')

        nowDay = time.strftime("%Y-%m-%d ", time.localtime())

        # 拼接,具体的到期日期
        finalTime = nowDay + endTime + ":59"

        # 现在的具体日期
        nowTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

        d1 = datetime.datetime.strptime(nowTime, '%Y-%m-%d %H:%M:%S')
        d2 = datetime.datetime.strptime(finalTime, '%Y-%m-%d %H:%M:%S')

        # 服务器时间比现实时间慢200秒左右
        d2 = d2 + datetime.timedelta(seconds=240)

        # 比较日期,如果 到期日期 < 当前日期
        if d1 > d2:
            differSecond = (d1 - d2).seconds
        else:
            differSecond = (d2 - d1).seconds

        logging.info("当前的时间:" + nowTime)
        logging.info("到期的时间:" + finalTime)

        # return 150 if differSecond < 130 else differSecond
        return differSecond

    # 退出Chromedriver
    def quit(self):
        # self.driver.close()
        self.driver.quit()
        self.s.stop()
        os.system('ps -ef|grep chromedriver|grep -v grep|awk \'{print $2}\'|xargs kill -9')
        os.system('ps -ef|grep chrome|grep -v grep|awk \'{print $2}\'|xargs kill -9')
Exemple #14
0
class CaesarReaderWindow(QMainWindow, Ui_myMainWindow):
    def __init__(self, parent=None):
        super(CaesarReaderWindow, self).__init__(parent)

        self.chromeDriverPath = os.path.abspath('chromedriver.exe')
        self.driver = None
        self.c_service = None
        self.startReadFlag = False
        self.stopReadFlag = False
        self.stopPPPOEFlag = False
        # 已完成文章篇数
        self.readNum = 0
        # 剩余文章篇数
        self.unReadNum = 0
        self.driverThread = DriverThread(self)
        self.pppoeThread = PPPOETask(self)

        self.setupUi(self)
        self.connectPppoeBtn.clicked.connect(self.connect_pppoe_click)
        self.disconnectPppoeBtn.clicked.connect(self.disconnect_pppoe_click)
        self.saveConfigBtn.clicked.connect(self.save_config_click)
        self.startReadBtn.clicked.connect(self.start_read_click)
        self.stopReadBtn.clicked.connect(self.end_read_click)
        self.init_config()

    def init_config(self):
        """
        初始化全局配置
        :return:
        """
        try:
            initConfigTask = InitConfigTask(self)
            pool = QThreadPool.globalInstance()
            pool.start(initConfigTask)
        except Exception as e:
            self.print_log(str(e))
        pass

    def popen(self, cmd):
        try:
            popen = subprocess.Popen(cmd, stdout=subprocess.PIPE)
            popen.wait()
            lines = popen.stdout.readlines()
            return [line.decode('gbk') for line in lines]
        except Exception as e:
            self.print_log("获取ip异常")
            return -1

    def save_config_click(self):
        """
        保存全局配置信息
        :return:
        """
        startTime = self.startTimeEdit.text()
        endTime = self.endTimeEdit.text()
        pauseTimeFrom = self.pauseTimeFromEdit.text()
        pauseTimeTo = self.pauseTimeToEdit.text()
        slipTimesFrom = self.slipTimesFromEdit.text()
        slipTimesTo = self.slipTimesToEdit.text()
        pxFrom = self.pxFromEdit.text()
        pxTo = self.pxToEdit.text()
        chromeLocation = self.chromeLocationEdit.text()
        if not (startTime and endTime and pauseTimeFrom and pauseTimeTo
                and slipTimesFrom and slipTimesTo and pxFrom and pxTo
                and chromeLocation):
            return

        configObj = {
            'startTime': startTime,
            'endTime': endTime,
            'pauseTimeFrom': pauseTimeFrom,
            'pauseTimeTo': pauseTimeTo,
            'slipTimesFrom': slipTimesFrom,
            'slipTimesTo': slipTimesTo,
            'pxFrom': pxFrom,
            'pxTo': pxTo,
            'chromeLocation': chromeLocation
        }

        with open(os.path.abspath('config.ini'), 'w', encoding='utf-8') as f:
            f.write(json.dumps(configObj))
            f.flush()
            f.close()
        pass

    def start_read_click(self):
        """
        开始阅读按钮点击
        :return:
        """
        self.startReadFlag = True
        self.stopReadFlag = False
        try:
            if not self.driverThread.isRunning():
                self.driverThread.trigger.connect(self.reconnect_pppoe)
                self.driverThread.start()
        except Exception as e:
            self.print_log(str(e))
        pass

    def end_read_click(self):
        """
        停止阅读按钮点击
        :return:
        """
        self.startReadFlag = False
        self.stopReadFlag = True

        try:
            if self.driver is not None:
                self.driver.quit()
                self.driver = None
        except Exception as msg:
            pass

        try:
            if self.c_service is not None:
                self.c_service.stop()
                self.c_service = None
        except Exception as msg:
            pass
        pass

    def connect_pppoe_click(self):
        """
        开始拨号按钮点击
        :return:
        """
        self.stopPPPOEFlag = False
        try:
            if not self.pppoeThread.isRunning():
                self.pppoeThread.trigger.connect(self.read_next)
                self.pppoeThread.start()
        except Exception as e:
            self.print_log(str(e))
        pass

    def disconnect_pppoe_click(self):
        """
        停止拨号按钮点击
        :return:
        """
        self.stopPPPOEFlag = True

        data = self.check_for_broadband()
        if data is not None:
            for p in data:
                self.show_ip_address()
                if self.disconnect_pppoe(p[0]) == "success":
                    self.print_log("宽带%s已经断开" % p[1])
                sleep(5)
        pass

    def read_next(self):
        """
        阅读下一篇
        :return:
        """
        if not self.startReadFlag:
            return

        if self.stopReadFlag or self.stopPPPOEFlag:
            return

        self.start_read_click()
        pass

    def reconnect_pppoe(self):
        """
        重新连接pppoe
        :return:
        """
        if self.stopReadFlag or self.stopPPPOEFlag:
            return
        self.connect_pppoe_click()
        pass

    def build_driver(self, url):
        """
        构建阅读driver
        :return:
        """
        if not url:
            self.stopReadFlag = True
            self.print_log("已全部阅读完成\n")
            return

        self.readNum = self.readNum + 1
        self.print_log("开始阅读第 %d 篇,剩余 %d 篇" % (self.readNum, self.unReadNum))
        self.print_log("当前:%s" % url)
        self.c_service = Service(self.chromeDriverPath)
        self.c_service.command_line_args()
        self.c_service.start()

        mobileEmulation = {
            "deviceMetrics": {
                "width": 320,
                "height": 640,
                "pixelRatio": 3.0
            },
            "userAgent":
            'Mozilla/5.0 (Linux; Android 4.1.1; GT-N7100 Build/JRO03C) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/6.3'
        }
        # mobileEmulation = {'deviceName': 'Apple iPhone 5'}
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--window-size=250,640')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--hide-scrollbars')
        chrome_options.add_argument('--disable-javascript')
        chrome_options.add_argument('--log-level=3')
        chrome_options.binary_location = self.chromeLocationEdit.text()
        chrome_options.add_experimental_option('mobileEmulation',
                                               mobileEmulation)
        chrome_options.add_experimental_option("excludeSwitches",
                                               ['enable-automation'])
        chrome_options.add_experimental_option('w3c', False)
        self.driver = webdriver.Chrome(options=chrome_options)

        # 操作这个对象.
        self.driver.get(url)
        num = random.randint(int(self.slipTimesFromEdit.text()),
                             int(self.slipTimesToEdit.text()))
        # 下滑次数
        hasNum = 0
        for n in range(num):
            if not self.stopReadFlag:
                holdTime = random.randint(int(self.pauseTimeFromEdit.text()),
                                          int(self.pauseTimeToEdit.text()))
                px = random.randint(int(self.pxFromEdit.text()),
                                    int(self.pxToEdit.text()))
                self.print_log("第 %d 次下滑,等待 %d 秒, 下滑 %d 像素" %
                               (n + 1, holdTime, px))
                # 每次下滑停顿时间
                sleep(holdTime)
                action = TouchActions(self.driver)
                action.scroll(0, 200).perform()
                hasNum = hasNum + 1
            else:
                break

        try:
            if self.driver is not None:
                self.driver.quit()
                self.driver = None
        except Exception as msg:
            pass

        try:
            if self.c_service is not None:
                self.c_service.stop()
                self.c_service = None
        except Exception as msg:
            pass

        if self.stopReadFlag:
            self.print_log("第 %d 篇阅未完成,共下滑 %d 次\n" % (self.readNum, hasNum))
            return
        else:
            self.print_log("第 %d 篇阅读完成,共下滑 %d 次\n" % (self.readNum, hasNum))

        try:
            # 删除第一行
            with open(os.path.abspath('unread.txt'), 'r',
                      encoding='utf-8') as f:
                content = f.readlines()
                with open(os.path.abspath('unread.txt'),
                          'w+',
                          encoding='utf-8') as f1:
                    f1.writelines(content[1:])
                    f1.flush()
                    f1.close()
                f.close()

            # 追加到最后一行
            with open(os.path.abspath('read.txt'), 'a', encoding='utf-8') as f:
                f.write(url)
                f.close()
        except Exception as e:
            print(e)
        pass

    def build_pppoe(self):
        """
        构建pppoe
        :return:
        """
        data = self.check_for_broadband()
        if data is not None:
            for p in data:
                self.show_ip_address()
                if self.disconnect_pppoe(p[0]) == "success":
                    self.print_log("宽带%s已经断开" % p[1])
                sleep(5)
        else:
            try:
                pid, res = self.dial_broadband()
                if res == 0:
                    self.show_ip_address()
                sleep(5)
            except Exception as ee:
                pass
        pass

    def print_log(self, message):
        """
        异度打印日志
        :param message: 日志信息
        :return:
        """
        try:
            logTask = LogTask(self, message)
            pool = QThreadPool.globalInstance()
            pool.start(logTask)
        except Exception as msg:
            print(msg)
        pass

    def setUnReadNum(self, num):
        self.unReadNum = num

    def connect_pppoe(self, dialname, account, passwd):
        dial_params = (dialname, '', '', account, passwd, '')
        return win32ras.Dial(None, None, dial_params, None)

    def dial_broadband(self):
        """
        宽带拨号
        :return:
        """
        self.pppoeStatusLbl.setText("正在拨号...")
        dialname = '宽带连接'  # just a name
        account = self.accountEdit.text()
        passwd = self.passwordEdit.text()
        self.print_log("正在拨号")
        try:
            # handle is a pid, for disconnect or showipadrress, if connect success return 0.
            # account is the username that your ISP supposed, passwd is the password.
            handle, result = self.connect_pppoe(dialname, account, passwd)
            if result == 0:
                self.print_log("拨号成功")
                self.pppoeStatusLbl.setText("拨号成功")
                return handle, result
            else:
                if self.stopPPPOEFlag:
                    self.print_log("拨号失败")
                    self.pppoeStatusLbl.setText("拨号失败")
                    return -1, -1
                else:
                    self.print_log("拨号失败,3秒后重试")
                    self.pppoeStatusLbl.setText("正在重试")
                    sleep(3)
                    return self.dial_broadband()
        except Exception as e:
            self.print_log("拨号异常" + str(e))
            return -1, -1

    def disconnect_pppoe(self, handle):
        self.print_log("正在断开宽带!")
        self.pppoeStatusLbl.setText("正在断开")
        if handle is not None:
            try:
                win32ras.HangUp(handle)
                self.print_log("宽带断开成功!")
                self.pppoeStatusLbl.setText("断开成功")
                return "success"
            except Exception as e:
                self.print_log("宽带断开失败,3秒后重试")
                self.pppoeStatusLbl.setText("断开失败")
                sleep(3)
                return self.disconnect_pppoe(handle)
        else:
            self.print_log("宽带断开异常")
            self.pppoeStatusLbl.setText("断开失败")
            return "fail"

    def check_for_broadband(self):
        connections = win32ras.EnumConnections()
        if len(connections) == 0:
            self.print_log("系统未运行任何宽带连接")
            return
        else:
            self.print_log("系统正在运行%d个宽带连接" % len(connections))
            return connections

    def show_ip_address(self):
        self.print_log("正在查询IP")
        self.pppoeIpLbl.setText("")

        ipconfig_result_list = self.popen('ipconfig')
        if ipconfig_result_list == -1:
            return

        ip_str = None
        have_ppp = 0
        for line in ipconfig_result_list:
            if line.find("宽带连接") >= 0:
                have_ppp = 1

            if have_ppp == 1:
                if line.strip().startswith("IPv4 地址"):
                    ip_str = line.split(":")[1].strip()
                    have_ppp = 0

        if ip_str is not None:
            self.print_log("IP地址为: " + ip_str)
            self.pppoeIpLbl.setText(ip_str)
        pass
Exemple #15
0
class ICloud(object):

    TIMEOUT = 100

    id = None
    account = None
    browser = None
    chrome = None
    wait = None
    tab = None
    deleted = False
    mapping = set()
    c_service = None

    def __init__(self):
        gc.collect()
        self.id = random.randint(0, 100)
        self.c_service = Service('chromedriver')
        self.c_service.command_line_args()
        self.c_service.start()
        self.start_browser()
        logger.info('Start a browser.')

    def __wait_for_visible(self, xpath):
        return self.wait.until(
            expected_conditions.visibility_of_element_located(
                (By.XPATH, xpath)))

    def run_login(self, account, password):
        logger.info('"{account}" is logging in.'.format(account=account))
        self.account = account
        # Get the login page.
        self.browser.get('https://www.icloud.com/#fmf')
        auth_frame = self.__wait_for_visible('//*[@id="auth-frame"]')
        self.browser.switch_to.frame(auth_frame)
        logger.info('Login page is loaded.')

        # Process input: account name and password.
        remember_me_input = self.browser.find_element_by_xpath(
            '//*[@id="remember-me"]')
        remember_me_input.click()

        account_name_text_field = self.browser.find_element_by_xpath(
            '//*[@id="account_name_text_field"]')
        account_name_text_field.send_keys(account)
        account_name_text_field.send_keys(Keys.RETURN)

        password_text_field = self.__wait_for_visible(
            '//*[@id="password_text_field"]')
        password_text_field.send_keys(password)
        password_text_field.send_keys(Keys.RETURN)

        try:
            # Wait until the code controls are visible.
            self.__wait_for_visible('//*[@id="char0"]')
            return True
        except TimeoutException:
            # Login failed.
            return False

    def run_codes(self, codes):
        # Write codes to each controls.
        for i in range(6):
            char = self.browser.find_element_by_xpath(
                '//*[@id="char{i}"]'.format(i=i))
            char.send_keys(codes[i])

        try:
            # Click trust button
            trust_browser = self.__wait_for_visible(
                '//*[starts-with(@id, "trust-browser-")]')
            trust_browser.click()
        except TimeoutException:
            # Codes were incorrect.
            return False

        self.save_cookies()

        logger.info('Start network listening...')
        self.tab = self.chrome.list_tab()[-1]
        self.tab.Network.responseReceived = self.response_received
        self.tab.start()
        self.tab.Network.enable()
        # Start auto refresh.
        Timer(60, self.auto_refresh).start()
        return True

    def save_cookies(self):
        cookies = self.browser.get_cookies()
        jsonCookies = json.dumps(cookies)
        with open('./logs/{id}.cookies'.format(id=self.id), 'w') as f:
            f.write(jsonCookies)

    def load_cookies(self):
        self.browser.delete_all_cookies()
        with open('./logs/{id}.cookies'.format(id=self.id), 'r') as f:
            listCookies = json.loads(f.read())
        for cookie in listCookies:
            self.browser.add_cookie({
                'domain': cookie['domain'],
                'name': cookie['name'],
                'value': cookie['value'],
                'path': '/',
                'expires': None
            })
        logger.info('Browser cookies loaded.')

    def start_browser(self):
        # Start chromedriver
        options = webdriver.ChromeOptions()
        options.add_argument('--disable-background-networking=false')
        options.add_argument('--no-sandbox')
        retry_count = 0
        while True:
            try:
                self.browser = webdriver.Chrome(
                    chrome_options=options,
                    service_args=[
                        '--verbose',
                        '--log-path=./logs/{id}.log'.format(id=self.id)
                    ])
                break
            except ConnectionResetError as e:
                retry_count += 1
                if retry_count >= 10:
                    raise e
        self.browser.set_page_load_timeout(self.TIMEOUT)
        self.wait = WebDriverWait(self.browser, self.TIMEOUT)
        # Get debug url
        url = None
        with open('./logs/{id}.log'.format(id=self.id), 'r') as log:
            for line in log:
                if 'DevTools request: http://localhost' in line:
                    url = line[line.index('http'):].replace(
                        '/json/version', '').strip()
                    break
        if not url:
            raise Exception('Invalid protocol url.')
        # Start pychrome
        self.chrome = pychrome.Browser(url=url)

    def restart_browser(self):
        try:
            if self.browser:
                self.browser.close()
                self.browser.quit()
        except Exception:
            pass
        self.start_browser()
        logger.info('Browser restarted.')
        retry = 0
        while True:
            try:
                self.browser.get('https://www.icloud.com/#fmf')
                self.load_cookies()
                self.browser.refresh()
                break
            except Exception as e:
                retry += 1
                if retry >= 5:
                    raise e
        logger.info('Start network listening...')
        self.tab = self.chrome.list_tab()[-1]
        self.tab.Network.responseReceived = self.response_received
        self.tab.start()
        self.tab.Network.enable()
        # Start auto refresh.
        Timer(60, self.auto_refresh).start()

    def auto_refresh(self):
        if self.deleted:
            return
        try:
            self.browser.switch_to.default_content()
            frame = self.__wait_for_visible('//*[@id="fmf"]')
            self.browser.switch_to.frame(frame)

            nearby = self.__wait_for_visible(
                '/html/body/div[2]/div/div/div[2]/div[1]/div/div[3]/div[1]/div[1]'
            )
            friends = self.browser.find_elements_by_xpath(
                '/html/body/div[2]/div/div/div[2]/div[1]/div/div[3]/div[1]/div[not(contains(@class, "nearby"))]'
            )
            for friend in friends:
                friend.click()
            nearby.click()
            self.save_cookies()
            Timer(60, self.auto_refresh).start()
        except WebDriverException as e:
            logger.error(e.args)
            self.refresh_page()

    def refresh_page(self, retry=1):
        if self.deleted:
            return
        if retry >= 5:
            logger.error('SERVICE DOWN! restarting...')
            try:
                self.restart_browser()
            except Exception as e:
                ICLOUD_DICT.pop(self.account)
                app.mail.send(
                    'FMF: SERVICE DOWN',
                    '<p>{account} unavailable, login again.</p><p>{e}</p>'.
                    format(account=self.account, e=e.args),
                    img='logs/{id}.png'.format(id=self.id))
            return
        logger.info('REFRESHING...')
        try:
            self.browser.save_screenshot('logs/{id}.png'.format(id=self.id))
            self.browser.refresh()
            Timer(60, self.auto_refresh).start()
        except Exception as e:
            logger.error(e.args)
            retry += 1
            Timer(10, self.refresh_page, [retry]).start()

    def response_received(self, **kwargs):
        response = kwargs.get('response')
        request_id = kwargs.get('requestId')
        if 'refreshClient' in response.get('url'):
            try:
                content = self.tab.Network.getResponseBody(
                    requestId=request_id)['body']
            except pychrome.CallMethodException:
                return
            logger.info('{request_id}: {content}'.format(request_id=request_id,
                                                         content=content))
            obj = json.loads(content)
            if 'locations' in obj:
                contacts = {}
                for contact in obj['contactDetails']:
                    id = contact['id']
                    name = '{first} {middle} {last}'.format(
                        first=contact['firstName'],
                        middle=contact['middleName'],
                        last=contact['lastName']).strip()
                    contacts[id] = name
                for loc in obj['locations']:
                    if loc['location'] is None:
                        continue
                    id = loc['id']
                    locid = loc['location']['locationId']
                    if loc['location']['address'] is None:
                        address = 'UNKNOWN'
                    elif 'formattedAddressLines' in loc['location']['address']:
                        address = ' '.join(loc['location']['address']
                                           ['formattedAddressLines'])
                    else:
                        address = '{streetAddress} {locality} {administrativeArea}'.format(
                            streetAddress=loc['location']['address']
                            ['streetAddress'],
                            locality=loc['location']['address']['locality'],
                            administrativeArea=loc['location']['address']
                            ['administrativeArea'])
                    time = loc['location']['timestamp'] / 1000.0
                    accuracy = loc['location']['horizontalAccuracy']
                    latitude = loc['location']['latitude']
                    longitude = loc['location']['longitude']
                    self.save_model({
                        'locid': locid,
                        'account': self.account,
                        'uid': id,
                        'name': contacts[id],
                        'time': time,
                        'accuracy': accuracy,
                        'latitude': latitude,
                        'longitude': longitude,
                        'address': address
                    })

    def save_model(self, obj):
        if obj['uid'] not in self.mapping:
            res = requests.get('http://yingyan.baidu.com/api/v3/entity/list',
                               params={
                                   'ak':
                                   BMAP_AK,
                                   'service_id':
                                   YINGYAN_ID,
                                   'filter':
                                   'entity_names:{uid}'.format(uid=obj['uid'])
                               })
            jo = json.loads(res.text)
            if jo['status'] != 0:
                res = requests.post(
                    'http://yingyan.baidu.com/api/v3/entity/add',
                    data={
                        'ak': BMAP_AK,
                        'service_id': YINGYAN_ID,
                        'entity_name': obj['uid'],
                        'entity_desc': obj['name']
                    })
                logger.info('YingYan ADD entity: {res}'.format(res=res.text))
            self.mapping.add(obj['uid'])
        if Location.objects.filter(locid=obj['locid']):
            return
        try:
            Location.objects.create(locid=obj['locid'],
                                    account=obj['account'],
                                    uid=obj['uid'],
                                    name=obj['name'],
                                    time=datetime.datetime.fromtimestamp(
                                        obj['time']),
                                    accuracy=obj['accuracy'],
                                    latitude=obj['latitude'],
                                    longitude=obj['longitude'],
                                    address=obj['address'])
        except IntegrityError:
            pass
        res = requests.post('http://yingyan.baidu.com/api/v3/track/addpoint',
                            data={
                                'ak': BMAP_AK,
                                'service_id': YINGYAN_ID,
                                'entity_name': obj['uid'],
                                'latitude': obj['latitude'],
                                'longitude': obj['longitude'],
                                'loc_time': int(obj['time']),
                                'radius': obj['accuracy'],
                                'coord_type_input': 'wgs84',
                                'address': obj['address']
                            })
        logger.info('YingYan ADD point: {res}'.format(res=res.text))

    def __del__(self):
        self.deleted = True
        if self.browser:
            self.browser.quit()
        self.c_service.stop()
        logger.info('A browser is Closed.')
Exemple #16
0
                user32.TranslateMessage(ctypes.byref(msg))
                user32.DispatchMessageA(ctypes.byref(msg))
        finally:
            del msg
            user32.UnregisterHotKey(None, 98)
            user32.UnregisterHotKey(None, 99)


if __name__ == "__main__":
    try:
        c_service = Service("chromedriver.exe")
        c_service.command_line_args()
        c_service.start()
    except WebDriverException as e:
        logging.error("Failed to start broswer service")

    try:
        init_log_config()
        thread_hotKey = HotKey("thread_hotKey")
        thread_hotKey.setDaemon(True)
        thread_hotKey.start()
        print("The thread listening event from keyboard is running...")

        driver = Driver()
        driver.run()
    except KeyboardInterrupt as e:
        logging.warning("Handle KeyboardInterrupt.")
        c_service.stop()
        os.sys.exit(1)
Exemple #17
0
class ChromeDriverAdapter:
    def __init__(self, console: Console) -> None:
        self.console = console
        self.service = Service((os.path.dirname(__file__) or '.') + ('/bin/%s/chromedriver' % system()) )
        #self.service = Service()
        self.drivers = []
        self.tempfolder = []
        self.service.start()
    
    def purge(self) -> None:
        for driver in self.drivers:
            try:
                driver.quit()
            except:
                pass
        for folder in self.tempfolder:
            shutil.rmtree(path=folder, ignore_errors=True)
        
        self.service.stop()

    def getNewInstance(self, uid, user, proxy=None, headless=False) -> WebDriver:
        pluginfile = None
        try:
            if proxy is None:
                if 'proxy' in user:
                    proxy = user['proxy']

            options = Options()
            options.add_argument('--no-sandbox')  
            options.add_argument("--disable-dev-shm-usage")

            #options.add_argument("--log-level=3")

            #options.add_argument("--single-process")
            
            options.add_argument('media.eme.enabled')
            options.add_argument("--disable-gpu")
            options.add_argument('--disable-popup-blocking')
            #options.add_argument("--window-position=-32000,-32000");
            options.add_argument("--disable-blink-features")
            options.add_argument("--disable-blink-features=AutomationControlled")
            
            #options.add_argument("--log-path=" + (os.path.dirname(__file__) or '.') + "/../chrome.log")

            options.add_argument('--ignore-certificate-errors-spki-list')
            options.add_argument('--ignore-certificate-errors')
            options.add_argument('--ignore-ssl-errors')
            
            
            
            options.add_experimental_option("excludeSwitches", ["enable-automation"])
            options.add_experimental_option('useAutomationExtension', False)
            
            
            userDataDir = mkdtemp()
            self.tempfolder.append(userDataDir)
            options.add_argument('--user-data-dir=%s' % userDataDir)

            if 'windowSize' in user:
                options.add_argument("--window-size=%s" % user['windowSize'])

            if headless:
                #options.add_argument("--disable-gpu")
                options.add_argument("--headless")
            
            # Set user agent if available
            if 'userAgent' in user:
                options.add_argument('user-agent=%s' % user['userAgent'])

            #incognito argument disable the use of the proxy, DO NOT SET ! 
            #options.add_argument("--incognito")
            

            desired_capabilities = DesiredCapabilities.CHROME.copy()
            
            
            # add a proxy if available
            if proxy:
                pluginfile = self.buildChromeExtension(proxy)
                options.add_extension(pluginfile)



            #Instantiate the driver
            #driver = WebDriver('bin/chromedriver',options=options, desired_capabilities=desired_capabilities)
            driver = webdriver.Remote(self.service.service_url, desired_capabilities=desired_capabilities, options=options)

            #Make webdriver = undefined
            script = '''
            Object.defineProperty(navigator, 'webdriver', {
                get: () => undefined
            })
            '''
            driver.execute_script(script)
            
            if pluginfile:
                os.remove(pluginfile)
                pluginfile = None

            return driver
        except:
            self.console.exception()
            if pluginfile:
                os.remove(pluginfile)
            return None
class Meeting:
    """
    * Stores all the meeting data and houses the meeting functions
    """
    def __init__(self, meeting_url):
        """
        * Reads the config file and imports meeting data
        ! Raises exception in case of no file found / invalid file
        """

        super().__init__()

        self.email = "*****@*****.**"
        self.password = "******"
        self.sleep_time = 12
        self.meeting_url = meeting_url

        self.port = random.randint(1024, 65535)
        self.service = Service("chromedriver", port=self.port)

    def init_driver(self):
        """
        * Initialises webdriver and sets options for headless Chrome
        """

        options = webdriver.ChromeOptions()
        options.headless = True
        options.binary_location = os.environ["GOOGLE_CHROME_BIN"]
        options.add_argument('--disable-gpu')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')

        self.service.start()
        self.driver = webdriver.Remote(
            command_executor="http://127.0.0.1:{}".format(str(self.port)),
            options=options,
        )
        self.driver.implicitly_wait(self.sleep_time)
        self.driver.set_window_size(3840, 2160)

    def try_to_join(self):
        """
        * Tries to join meeting as per meeting data
        ! Raises exception if meeting data invalid
        """

        self.driver.get(self.meeting_url)

        join_from_browser_link = self.driver.find_element_by_xpath(
            "//div[@class='desc24 webclient hideme']//a").get_attribute("href")

        self.driver.get(join_from_browser_link)

        self.driver.find_element_by_id("email").send_keys(self.email)
        self.driver.find_element_by_id("password").send_keys(self.password)

        self.driver.find_element_by_xpath("//div[@class='signin']").click()

        self.random_hash = ''.join(
            random.choices(
                string.ascii_letters + string.digits,
                k=16,
            ))

        self.driver.find_element_by_id("inputname").clear()
        self.driver.find_element_by_id("inputname").send_keys(self.random_hash)
        self.driver.find_element_by_id("joinBtn").click()

    def get_participants_list(self):
        """
        * Collects participants and exports them to a CSV
        ! Might raise an exception if the internet is slow
        """

        # * Sleep for a while in order to wait for the partipants list to load
        # ? Any better alternative
        time.sleep(self.sleep_time)

        self.driver.find_elements_by_class_name(
            "footer-button__button.ax-outline")[0].click()

        self.partipants_list = [
            name.get_attribute("aria-label").split() for name in
            self.driver.find_elements_by_class_name("item-pos.participants-li")
        ]

        if not self.partipants_list:
            sys.exit(1)

    def leave_meeting(self):
        """
        * Leaves the ongoing meeting and closes the browser window
        """

        self.driver.find_element_by_class_name(
            "footer__leave-btn.ax-outline").click()
        self.driver.find_element_by_class_name(
            "zm-btn.zm-btn-legacy.zm-btn--primary.zm-btn__outline--blue"
        ).click()

        self.driver.quit()
        self.service.stop()

    def export_data(self):
        """
        * Exports data to a 'participants.csv' file
        """

        for idx, row in enumerate(self.partipants_list):
            self.partipants_list[idx] = row[0:row.index("audio") - 1]

        self.partipants_list.sort()

        return_object = ""
        for idx, row in enumerate(self.partipants_list):
            if row[-1] not in ["(Me)", "(Host)"]:
                return_object += " ".join(row) + "\n"

        return return_object
Exemple #19
0
class CookiesGenerator(object):
    def __init__(self, website='default'):
        """
        父类, 初始化一些对象
        :param website: 名称
        :param browser: 浏览器, 若不使用浏览器则可设置为 None
        """
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.accounts_db = RedisClient('accounts', self.website)
        self.init_browser()

    def __del__(self):
        self.close()

    def init_browser(self):
        """
        通过browser参数初始化全局浏览器供模拟登录使用
        :return:
        """
        if BROWSER_TYPE == 'PhantomJS':
            caps = DesiredCapabilities.PHANTOMJS
            caps[
                "phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
            self.browser = webdriver.PhantomJS(desired_capabilities=caps)
            self.browser.set_window_size(1400, 500)
        elif BROWSER_TYPE == 'Chrome':
            # 启用谷歌浏览器
            self.driver_service = Service(
                executable_path=chromedriver_path)  # 括号内填写 驱动路径
            chrome_options = Options()
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--no-sandbox')  # 这个配置很重要
            # desired_capabilities = selenium_proxy() #添加代理IP
            # driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver_path,desired_capabilities=desired_capabilities)
            # , desired_capabilities = desired_capabilities
            self.browser = webdriver.Chrome(chrome_options=chrome_options,
                                            executable_path=chromedriver_path)

    def new_cookies(self, username, password):
        """
        新生成Cookies,子类需要重写
        :param username: 用户名
        :param password: 密码
        :return:
        """
        raise NotImplementedError

    def process_cookies(self, cookies):
        """
        处理Cookies
        :param cookies:
        :return:
        """
        dict = {}
        for cookie in cookies:
            dict[cookie['name']] = cookie['value']
        return dict

    def run(self):
        """
        运行, 得到所有账户, 然后顺次模拟登录
        :return:
        """
        accounts_usernames = self.accounts_db.usernames()
        cookies_usernames = self.cookies_db.usernames()

        for username in accounts_usernames:
            # if not username in cookies_usernames:
            if username not in cookies_usernames:
                password = self.accounts_db.get(username)
                print('正在生成Cookies', '账号', username, '密码', password)
                result = self.new_cookies(username, password)
                # 成功获取
                if result.get('status') == 1:
                    cookies = self.process_cookies(result.get('content'))
                    print('成功获取到Cookies', cookies)
                    if self.cookies_db.set(username, json.dumps(cookies)):
                        print('成功保存Cookies')
                # 密码错误,移除账号
                elif result.get('status') == 2:
                    print(result.get('content'))
                    if self.accounts_db.delete(username):
                        print('成功删除账号')
                else:
                    print(result.get('content'))
        else:
            print('所有账号都已经成功获取Cookies')

    def close(self):
        """
        关闭
        :return:
        """
        try:
            print('Closing Browser')
            self.browser.close()
            self.driver_service.stop()
            del self.browser
        except TypeError:
            print('Browser not opened')
Exemple #20
0
class SeleniumDriverDispatcher:

    def __init__(self):
        self.__config = None
        self.__driver = None
        self.__proxy_server = None
        self.__proxy = None
        self.__driver_service = None

    def __create_gui_element_dispatcher(self, element):
        return SeleniumDriverElementDispatcher.create_dispatcher(self, element)

    @property
    def driver(self):
        return self.__driver

    @property
    def proxy(self):
        return self.__proxy

    def launch(self, config):
        self.__config = config
        from .browser_launcher import BrowserLauncher

        svc_url = config["arjuna_options"]["SELENIUM_SERVICE_URL"]
        driver_download = config["arjuna_options"]["SELENIUM_DRIVER_DOWNLOAD"]
        browser_name = config["arjuna_options"]["BROWSER_NAME"]
        driver_path = config["arjuna_options"]["SELENIUM_DRIVER_PATH"]
        if svc_url.lower() == "not_set":
            from arjuna.tpi.constant import BrowserName
            driver_service = None
            driver_downloader = None
            if browser_name == BrowserName.CHROME:
                from selenium.webdriver.chrome.service import Service
                driver_service = Service
                from webdriver_manager.chrome import ChromeDriverManager
                driver_downloader = ChromeDriverManager
            elif browser_name == BrowserName.FIREFOX:
                from selenium.webdriver.firefox.service import Service
                driver_service = Service
                from webdriver_manager.firefox import GeckoDriverManager
                driver_downloader = GeckoDriverManager
            if driver_download:
                driver_path = driver_downloader().install()
            self.__driver_service = Service(driver_path)
            self.__driver_service.start()
            svc_url = self.__driver_service.service_url
        else:
            if not svc_url.lower().endswith("/wd/hub"):
                svc_url += "/wd/hub"

        # BrowserMob
        from arjuna import Arjuna
        bmproxy_server = Arjuna._get_bmproxy_server()
        if bmproxy_server is not None:
            self.__proxy = bmproxy_server.create_proxy()

        self.__driver = BrowserLauncher.launch(config, svc_url=svc_url, proxy=self.__proxy) 

    def quit(self):
        DriverCommands.quit(self.__driver)
        if self.__driver_service:
            self.__driver_service.stop()
        if self.__proxy:
            self.__proxy.close()

    def go_to_url(self, url):
        DriverCommands.go_to_url(self.__driver, url)

    def go_back_in_browser(self):
        DriverCommands.go_back_in_browser(self.__driver)

    def go_forward_in_browser(self):
        DriverCommands.go_forward_in_browser(self.__driver)

    def refresh_browser(self):
        DriverCommands.refresh_browser(self.__driver)

    def get_source(self):
        return DriverCommands.get_source(self.__driver)

    def send_keys(self, key_str):
        DriverCommands.send_keys(self.__driver, key_str)

    def execute_javascript(self, script, *args):
        return DriverCommands.execute_javascript(self.__driver, script, 
                *[
                isinstance(arg, SeleniumDriverElementDispatcher) and arg.driver_element or arg for arg in args
            ]
        )

    def take_screenshot(self, file_path):
        DriverCommands.take_screenshot(self.__driver, file_path)

    def take_screenshot_as_base64(self):
        return DriverCommands.take_screenshot_as_base64(self.__driver)

    def find_element(self, with_type, with_value, *, relations=None, filters=None):
        element = SeleniumElementFinder.find_element(self.__driver, with_type, with_value, relations=relations, filters=filters)
        return 1, self.__create_gui_element_dispatcher(element)

    def __process_single_js_element(self, element):
        # JS returns null, undefined
        if element is None:
            raise Exception("JavaScript could not find element.")
        elif not isinstance(element, WebElement):
            raise Exception("JavaScript returned a non-element object.")
        else:
            return element

    def __process_js_element_list(self, elements):
        if not elements: raise Exception("JavaScript could not find element.")
        return [self.__process_single_js_element(e) for e in elements]

    def __process_js_element(self, element):
        if type(element) is list:
            element = self.__process_js_element_list(element)[0]
        else:
            element = self.__process_single_js_element(element)
        return element

    def __process_js_multielement(self, elements):
        if type(elements) is list:
            elements = self.__process_js_element_list(elements)
        else:
            elements = [self.__process_single_js_element(elements)]
        return elements

    def find_element_with_js(self, js):
        element = self.execute_javascript(js)
        element = self.__process_js_element(element)
        return 1, self.__create_gui_element_dispatcher(element)

    def find_multielement(self, with_type, with_value, *, relations=None, filters=None):
        web_elements = SeleniumElementFinder.find_elements(self.__driver, with_type, with_value, relations=relations, filters=filters)
        melement = MultiElement([SeleniumDriverElementDispatcher(self, web_element) for web_element in web_elements])
        return melement.get_size(), melement

    def find_multielement_with_js(self, js):
        web_elements = self.execute_javascript(js)
        web_elements = self.__process_js_multielement(web_elements)
        melement = MultiElement([SeleniumDriverElementDispatcher(self, web_element) for web_element in web_elements])
        return melement.get_size(), melement

    def get_current_window_handle(self):
        return DriverCommands.get_current_window_handle(self.__driver)

    def get_current_window_title(self):
        return DriverCommands.get_window_title(self.__driver)

    def maximize_current_window(self):
        DriverCommands.maximize_window(self.__driver)

    def get_current_window_size(self):
        res = DriverCommands.get_current_window_size(self.__driver)
        return {"width" : res[0], "height" : res[1]}

    def get_all_window_handles(self):
        return DriverCommands.get_all_winodw_handles(self.__driver)

    def focus_on_window(self, handle):
        DriverCommands.focus_on_window(self.__driver, handle)

    def close_current_window(self):
        DriverCommands.close_current_window(self.__driver)

    def is_web_alert_present(self):
        return DriverCommands.is_web_alert_present(self.__driver)

    def confirm_web_alert(self):
        DriverCommands.confirm_web_alert(self.__driver)

    def dismiss_web_alert(self):
        DriverCommands.dismiss_web_alert(self.__driver)

    def get_text_from_web_alert(self):
        return DriverCommands.get_text_from_web_alert(self.__driver)

    def send_text_to_web_alert(self, text):
        DriverCommands.send_text_to_web_alert(self.__driver, text)

    def focus_on_frame(self, element_dispatcher):
        DriverCommands.focus_on_frame(self.__driver, element_dispatcher.driver_element)

    def get_element_for_setu_id(self, id):
        return self.__driver_elements[id]

    def focus_on_parent_frame(self):
        DriverCommands.focus_on_parent_frame(self.__driver)

    def focus_on_dom_root(self):
        DriverCommands.focus_on_dom_root(self.__driver)

    def perform_action_chain(self, action_chain):
        DriverCommands.perform_action_chain(self, self.__driver, action_chain)

    def hover_on_element(self, element_dispatcher):
        DriverCommands.hover_on_element(self.__driver, element_dispatcher.driver_element)

    def mouse_click_on_element(self, element_dispatcher):
        DriverCommands.mouse_click_on_element(self.__driver, element_dispatcher.driver_element)

    def double_click_on_element(self, element_dispatcher):
        DriverCommands.double_click_on_element(self.__driver, element_dispatcher.driver_element)

    def scroll_to_element(self, element_dispatcher):
        DriverCommands.scroll_to_element(self.__driver, element_dispatcher.driver_element)
                browser.quit()
                continue

            # random body temperature
            input_fields = browser.find_elements_by_tag_name('input')
            for i in input_fields:
                if i.get_attribute("name").find("DZ_JSDTCJTW") >= 0:
                    # scroll down until body temperature textfield is visible
                    browser.execute_script("arguments[0].scrollIntoView();", i)

                    i.click()
                    i.send_keys(str(random.randint(362, 370) / 10.0))
                    break
            time.sleep(10)

            # save
            save_btn = browser.find_element_by_css_selector(
                "div#save.bh-btn.bh-btn-primary")
            time.sleep(5)
            save_btn.click()
            time.sleep(5)

            # confirm
            confirm_btn = browser.find_element_by_css_selector(
                "a.bh-dialog-btn.bh-bg-primary.bh-color-primary-5")
            confirm_btn.click()
            time.sleep(5)
            browser.quit()
            print("Successfully fill in!")
driver_service.stop()
Exemple #22
0
        return True


username = ['201xxx', '201xxx']
password = ['xxx', 'xxx']
province = ['河南省', '河南省']
city = ['郑州市', '郑州市']
receiver = ['@qq.com', '@163.com']

num = len(username)
res = []
for i in range(num):
    res.append(dk(username[i], password[i], province[i], city[i]))

driver.quit()
service.stop()
os.system('taskkill /im chromedriver.exe /F')
os.system('taskkill /im chrome.exe /F')

#sender_username为发件人的账号
sender_username = '******'
#pwd为邮箱的授权码
pwd = 'xxx'

#邮件的正文内容
fenxiang_img = get_jsciba()
djt = get_djt()
mail_content = f'''
            <p>{today_md}打卡完成</p>
            <img src="{fenxiang_img}">
            <p>{djt}</p>
Exemple #23
0
class Chrome(WebDriver):
    """
    Controls the ChromeDriver and allows you to drive the browser.

    You will need to download the ChromeDriver executable from
    http://chromedriver.storage.googleapis.com/index.html
    """
    def __init__(self,
                 executable_path="chromedriver",
                 port=0,
                 options=None,
                 service_args=None,
                 desired_capabilities=None,
                 service_log_path=None,
                 chrome_options=None,
                 keep_alive=True):
        """
        Creates a new instance of the chrome driver.

        Starts the service and then creates new instance of chrome driver.

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - port you would like the service to run, if left as 0, a free port will be found.
         - options - this takes an instance of ChromeOptions
         - service_args - List of args to pass to the driver service
         - desired_capabilities - Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_log_path - Where to log information from the driver.
         - chrome_options - Deprecated argument for options
         - keep_alive - Whether to configure ChromeRemoteConnection to use HTTP keep-alive.
        """
        if chrome_options:
            warnings.warn('use options instead of chrome_options',
                          DeprecationWarning,
                          stacklevel=2)
            options = chrome_options

        if options is None:
            # desired_capabilities stays as passed in
            if desired_capabilities is None:
                desired_capabilities = self.create_options().to_capabilities()
        else:
            if desired_capabilities is None:
                desired_capabilities = options.to_capabilities()
            else:
                desired_capabilities.update(options.to_capabilities())

        self.service = Service(executable_path,
                               port=port,
                               service_args=service_args,
                               log_path=service_log_path)
        self.service.start()

        try:
            WebDriver.__init__(self,
                               command_executor=ChromeRemoteConnection(
                                   remote_server_addr=self.service.service_url,
                                   keep_alive=keep_alive),
                               desired_capabilities=desired_capabilities)
        except Exception:
            self.quit()
            raise
        self._is_remote = False

    def launch_app(self, id):
        """Launches Chrome app specified by id."""
        return self.execute("launchApp", {'id': id})

    def get_network_conditions(self):
        """
        Gets Chrome network emulation settings.

        :Returns:
            A dict. For example:

            {'latency': 4, 'download_throughput': 2, 'upload_throughput': 2,
            'offline': False}

        """
        return self.execute("getNetworkConditions")['value']

    def set_network_conditions(self, **network_conditions):
        """
        Sets Chrome network emulation settings.

        :Args:
         - network_conditions: A dict with conditions specification.

        :Usage:
            driver.set_network_conditions(
                offline=False,
                latency=5,  # additional latency (ms)
                download_throughput=500 * 1024,  # maximal throughput
                upload_throughput=500 * 1024)  # maximal throughput

            Note: 'throughput' can be used to set both (for download and upload).
        """
        self.execute("setNetworkConditions",
                     {'network_conditions': network_conditions})

    def execute_cdp_cmd(self, cmd, cmd_args):
        """
        Execute Chrome Devtools Protocol command and get returned result

        The command and command args should follow chrome devtools protocol domains/commands, refer to link
        https://chromedevtools.github.io/devtools-protocol/

        :Args:
         - cmd: A str, command name
         - cmd_args: A dict, command args. empty dict {} if there is no command args

        :Usage:
            driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId})

        :Returns:
            A dict, empty dict {} if there is no result to return.
            For example to getResponseBody:

            {'base64Encoded': False, 'body': 'response body string'}

        """
        return self.execute("executeCdpCommand", {
            'cmd': cmd,
            'params': cmd_args
        })['value']

    def quit(self):
        """
        Closes the browser and shuts down the ChromeDriver executable
        that is started when starting the ChromeDriver
        """
        try:
            WebDriver.quit(self)
        except Exception:
            # We don't care about the message because something probably has gone wrong
            pass
        finally:
            self.service.stop()

    @staticmethod
    def create_options():
        return Options()
Exemple #24
0
def register(country_code, email, password):
    global cache
    # Login
    try:
        c_service = Service("/usr/bin/chromedriver")
        c_service.command_line_args()
        c_service.start()
        selenium_list = [x.strip() for x in open("node.txt", "r").readlines()]
        entry = random.choice(selenium_list)
        driver = webdriver.Remote(
            command_executor='http://%s:4444/wd/hub' % entry,
            desired_capabilities=chrome_options.to_capabilities())
        print("Choose Node:", entry)

        if email in cache:
            session, schedule_id, group_id = cache[email]
            new_session = change_region(country_code, session, group_id)
            driver.get("https://ais.usvisa-info.com")
            driver.add_cookie({
                'name': '_yatri_session',
                'value': new_session,
                'path': '/',
                'domain': 'ais.usvisa-info.com',
                'secure': True
            })
            driver.get("https://ais.usvisa-info.com/%s/niv/groups/%s" %
                       (country_code, group_id))
        else:
            driver.get("https://ais.usvisa-info.com/%s/niv/users/sign_in" %
                       country_code)
            email_box = driver.find_element_by_id("user_email")
            email_box.clear()
            email_box.send_keys(email)
            password_box = driver.find_element_by_id("user_password")
            password_box.clear()
            password_box.send_keys(password)
            driver.execute_script(
                "document.getElementById('policy_confirmed').click()")
            signin_button = driver.find_element_by_name("commit")
            signin_button.click()

        def wait_loading(xpath, option="locate"):
            try:
                if option == "locate":
                    element_present = EC.presence_of_element_located(
                        (By.XPATH, xpath))
                elif option == "clickable":
                    element_present = EC.element_to_be_clickable(
                        (By.XPATH, xpath))
                WebDriverWait(driver, wait_timeout).until(element_present)
            except TimeoutException:
                print("Timed out waiting for page to load")
                driver.execute_script("window.scrollTo(0, 1080)")
                driver.save_screenshot("test.png")

        # Continue
        continue_button_xpath = "//a[contains(text(), 'Continue')]"
        wait_loading(continue_button_xpath)
        current_url = driver.current_url
        group_id = current_url.split("/")[-1]
        continue_button = driver.find_element_by_xpath(continue_button_xpath)
        continue_button.click()

        # Choose action
        pay_button_xpath = "//a[contains(text(), 'Pay Visa Fee')]"
        wait_loading(pay_button_xpath)
        banner = driver.find_element_by_tag_name('h5')
        banner.click()
        wait_loading(pay_button_xpath, option="clickable")
        pay_button = driver.find_element_by_xpath(pay_button_xpath)
        pay_button.click()

        # Collect result
        title_xpath = "//h2[contains(text(), 'MRV Fee Details')]"
        wait_loading(title_xpath)
        time_table = driver.find_element_by_class_name('for-layout')
        result = []
        if time_table:
            trs = time_table.find_elements_by_tag_name('tr')
            for tr in trs:
                tds = tr.find_elements_by_tag_name('td')
                if not len(tds) == 2:
                    continue
                place = tds[0].text
                date_str = tds[1].text
                s = date_str.split()
                year, month, day = 0, 0, 0
                if len(s) >= 3 and s[0] != "No":
                    day_str, month_str, year_str = s[-3], s[-2].replace(
                        ",", ""), s[-1]
                    year, month, day = int(year_str), g.MONTH[month_str], int(
                        day_str)
                result.append([place, (year, month, day)])

        current_url = driver.current_url
        schedule_id = current_url.split("/")[-2]
        session = driver.get_cookie("_yatri_session")["value"]
        driver.quit()
        c_service.stop()
        if result:
            cache[email] = [session, schedule_id, group_id]
        else:
            del cache[email]
        return result, session, schedule_id
    except Exception as e:
        if email in cache:
            del cache[email]
        print(str(e))
    if driver:
        driver.quit()
    if c_service:
        c_service.stop()
    return None, None, None
Exemple #25
0
def donghangcrawler(city1, city2, date_in):
    print("东方航空爬虫开始运行")
    infolist = []
    pricelist = []
    resultlist = []
    cell = {}
    date_in = date_in[0:4] + '-' + date_in[4:6] + '-' + date_in[6:8]
    c_service = Service('./webdriver/chromedriver.exe')
    c_service.command_line_args()
    browser = webdriver.Chrome(executable_path='./webdriver/chromedriver.exe')
    c_service.start()
    browser.get('http://www.ceair.com/')
    ad = browser.find_element_by_id("appd_wrap_close")
    citya = browser.find_element_by_id("label_ID_0")  # 出发城市
    cityb = browser.find_element_by_id("label_ID_1")  # 到达城市
    date = browser.find_element_by_id("depDt")
    datex = browser.find_element_by_id("deptDtRt")
    search = browser.find_element_by_id("btn_flight_search")  # 查询按钮
    time.sleep(0.5)  # 必须等待页面加载结束后开始操作,否则会被当机器人
    ad.click()  # 关闭广告,会遮挡查询页面
    time.sleep(0.5)
    citya.clear()  # 删除默认输入
    time.sleep(0.5)
    citya.send_keys(city1)
    time.sleep(0.5)
    citya.send_keys(Keys.TAB)
    cityb.send_keys(city2)
    time.sleep(0.5)
    cityb.send_keys(Keys.TAB)
    time.sleep(0.5)
    date.send_keys(Keys.BACKSPACE, Keys.BACKSPACE, Keys.BACKSPACE,
                   Keys.BACKSPACE, Keys.BACKSPACE, Keys.BACKSPACE,
                   Keys.BACKSPACE, Keys.BACKSPACE, Keys.BACKSPACE,
                   Keys.BACKSPACE, date_in)  # 清除默认值重新赋值,不能用clear
    time.sleep(0.5)
    date.send_keys(Keys.TAB)  # 消除弹出菜单
    time.sleep(0.5)
    datex.send_keys(Keys.TAB)  # 消除日历菜单
    time.sleep(0.5)
    search.click()
    time.sleep(3)
    try:
        browser.switch_to.window(browser.window_handles[1])  # 定位到跳转后的查询结果页面
    except BaseException:
        time.sleep(3)
        search.click()
        time.sleep(3)
        browser.switch_to.window(browser.window_handles[1])  # 定位到跳转后的查询结果页面

    time.sleep(5)
    info = browser.find_elements_by_xpath(
        "//section[@class='summary']")  # 信息模块组
    price = browser.find_elements_by_xpath(
        "//dd[@data-type='economy']")  # 价格方块组
    if info is None or price is None:
        print("----东航--nodata----")
        return resultlist
    for i in info:
        clean = str(i.text).strip().split()  # 解析机票基本信息
        # ['东方航空', '|', 'MU5104|直达|', '09:00', '首都国际机场', 'T2', '直达', '11:15', '虹桥国际机场', 'T2', '02小时15分钟']
        clean.remove('|')
        clean[1] = clean[1].split('|')[0]  # 提取航班号
        infolist.append(clean)
    for j in price:
        clean = str(j.text)
        if clean:
            clean = clean.split()[1]
        else:
            clean = '售完'
        pricelist.append(clean)
    for info_, price_ in zip(infolist, pricelist):
        cell['Airline'] = info_[0]
        cell['FlightNumber'] = info_[1]
        cell['dTime'] = info_[2]
        cell['dAirport'] = info_[3]
        cell['aTime'] = info_[6]
        cell['aAirport'] = info_[7]
        cell['LowestPrice'] = price_
        resultlist.append(cell.copy())
    browser.quit()
    c_service.stop()
    #	with open('./data/donghang.csv', 'w', encoding='utf-8') as csvfile: # 运行整个项目时解除此注释
    #		writer = csv.writer(csvfile, delimiter=',')
    #		for i in resultlist:
    #			writer.writerow([city1, city2, i.get("Airline"), i.get('FlightNumber'), i.get('dAirport'),
    #							 i.get('aAirport'), i.get('dTime'), i.get('aTime'), i.get('LowestPrice'), '东方航空'])
    #	csvfile.close()
    print("东方航空爬虫运行结束")
    return resultlist