Beispiel #1
0
wd = dr.window_handles  # 所有的句柄
dr.switch_to_window(wd[-1])  # 切换句柄

# 处理框架
dr.switch_to_frame('')  # 切换到内置框架中
dr.switch_to.default_content()  # 退到原始页面
dr.switch_to.parent_frame()  # 切换到上一层框架

# 等待时间
from selenium import webdriver
from time import sleep
import selenium.webdriver.support.ui as ui

dr = webdriver.Firefox()
dr.get('http://www.moore.ren/')
# dr.maximize_window()
# 强制等待sleep()
sleep(2)

# 智能等待  设置控制器dr等待  (先判断等待的元素是否显示在页面,如果显示了就不需要等待,不显示则等待)
wait = ui.WebDriverWait(dr, 10)  # 最大等待时间,就是说等待10秒后还是没有显示就会报错timeout
un = wait.until(lambda dr: dr.find_element_by_xpath(
    '/html/body/div[2]/div/div[2]/div[6]/div/a/img').is_displayed())
# is_displayed判断元素有没有显示在屏幕上
# is_enabled 是判断元素是否为灰化状态

print(
    dr.find_element_by_xpath(
        '/html/body/div[2]/div/div[2]/div[6]/div/a/img').is_displayed())

dr.quit()
Beispiel #2
0
# _*_ coding: utf-8 _*_
from selenium import webdriver
import selenium.webdriver.support.ui as ui
import time

print('----------------SYSTEM LOADIUNG, please wait........')
SUMRESOURCES = 0  #全局变量
driver_item = webdriver.PhantomJS(
    executable_path=
    '/Users/sallyfan/downloads/phantomjs-2.1.1-macosx/bin/phantomjs')
driver_detail = webdriver.Chrome(
    executable_path='/Users/sallyfan/downloads/chromedriver')
url = "https://movie.douban.com/explore#!type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=0"
#等待页面家在方法
wait1 = ui.WebDriverWait(driver_item, 15)
wait = ui.WebDriverWait(driver_detail, 15)


#获取url和文章标题
def getURL_Title():
    global SUMRESOURCES

    # 需要输入获取信息,例 种类,排序,想看多少

    print('please select:')
    kind = input(
        "1-Hot\n2-Newest\n3-Classics\n4-Playable\n5-High Scores\n6-Wonderful but not popular\n7-Chinese film\n8-Hollywood\n9-Korea\n10-Japan\n11-Action movies\n12-Comedy\n13-Love story\n14-Science fiction\n15-Thriller\n16-Horror film\n17-Cartoon\nplease select:"
    )
    print("-----------------------------------------------")
    sort = input(
        "1-Sort by hot\n2-Sort by time\n2-Sort by score\nplease select:")
    def get_music(self):
        while True:
            if self.option_driver is False:
                self.driver_firefox()
                self.option_driver = True
                if self.driver is None:
                    self.option_driver = False
                    continue
            user_id = self.music_task.get()
            userId = user_id.strip()
            print('开始获取用户ID为:%s的歌曲。。。' % userId)
            try:
                self.driver.get(
                    "http://music.163.com/user/songs/rank?id=%s" % userId
                )  # 需要抓取的用户链接,这里注意的是这里的id不是用户的id,而是用户听歌形成的所有时间排行的排行版的id
                self.driver.switch_to.frame(
                    'g_iframe')  # 从windows切换到frame,切换到歌曲列表所在的frame
            except:
                self.option_driver = False
                continue
            try:
                time.sleep(1)
                wait = ui.WebDriverWait(self.driver, 60)
                # 找到歌曲列表所在的父标签
                if wait.until(lambda driver: driver.find_element_by_class_name(
                        'g-bd')):
                    WebDriverWait(self.driver, 10).until(
                        EC.presence_of_element_located(
                            (By.CLASS_NAME, "z-sel")))
                    if self.driver.find_element_by_class_name(
                            'z-sel').text == '所有时间':
                        soup = BeautifulSoup(self.driver.page_source,
                                             features='lxml')
                        listen_num, all_time = self.music_info(soup)
                        week_time = ''
                    else:
                        soup = BeautifulSoup(self.driver.page_source,
                                             features='lxml')
                        listen_num, week_time = self.music_info(soup)
                        self.driver.find_element_by_id('songsall').click()
                        time.sleep(1)
                        soup = BeautifulSoup(self.driver.page_source,
                                             features='lxml')
                        listen_num, all_time = self.music_info(soup)
                else:
                    all_time, week_time, listen_num = '', '', '0'

                result = {
                    'userId': user_id,
                    'all_music': all_time,
                    'week_music': week_time,
                    'listen_num': listen_num
                }
                self.user_result_queue.put(result)
            except Exception as e:
                print(e)
                try:
                    soup = BeautifulSoup(self.driver.page_source,
                                         features='lxml')
                    listen_num = soup.find(
                        'div',
                        attrs={
                            'class': 'u-title u-title-1 f-cb m-record-title'
                        }).find('h4').string
                    listen_num = re.findall(r'累积听歌(.*?)首',
                                            listen_num)[0].strip()
                except:
                    listen_num = '0'
                result = {
                    'userId': user_id,
                    'all_music': '',
                    'week_music': '',
                    'listen_num': listen_num
                }
                self.user_result_queue.put(result)
def get_kindle_text(driver_type, book_title, first_page, last_page):
    if driver_type == 'chrome':
        path_to_chromedriver = '/Users/rwest/Downloads/chromedriver'  # change path as needed
        driver = webdriver.Chrome(executable_path=path_to_chromedriver)
    elif driver_type == 'phantomjs':
        # phantom js settings
        dcap = dict(webdriver.DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/45.0.2454.99")

        #path_to_phantomjs = '/Users/rwest/Desktop/phantomjs' # change path as needed
        path_to_phantomjs = 'phantomjs'
        driver = webdriver.PhantomJS(executable_path=path_to_phantomjs,
                                     desired_capabilities=dcap)


    url = 'https://www.amazon.com/ap/signin?openid.assoc_handle=amzn_kweb&openid.'\
          'return_to=https%3A%2F%2Fread.amazon.com%2F&openid.mode='\
          'checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.'\
          'net%2Fauth%2F2.0&openid.identity=http%3A%2F%2Fspecs.openid.'\
          'net%2Fauth%2F2.0%2Fidentifier_select&openid.claimed_id='\
          'http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&'\
          'pageId=amzn_kcr'
    driver.get(url)

    # Fill out login form and submit
    email = driver.find_element_by_id("ap_email")
    password = driver.find_element_by_id("ap_password")
    email.send_keys("*****@*****.**")
    with open('kindle_credentials.txt', 'rb') as file:
        password_str = file.read()
    password.send_keys(password_str)
    driver.find_element_by_id("signInSubmit-input").click()

    # Wait until logged in then switch to KindleLibraryIFrame
    wait = ui.WebDriverWait(driver, 10)
    iFrame = wait.until(
        lambda driver: driver.find_element_by_id('KindleLibraryIFrame'))
    driver.switch_to.frame(iFrame)

    # close pop up message to use offline reader
    driver.find_element_by_id('kindle_dialog_firstRun_button').click()

    # select chosen book
    books = wait.until(
        lambda driver: driver.find_elements_by_class_name('book_title'))
    #books = driver.find_elements_by_class_name('book_title')
    for book in books:
        print book.text
        if book.text == book_title:
            book.click()

    # wait until book opened then switch to KindleReaderIFrame
    driver.switch_to.default_content()
    iFrame2 = wait.until(
        lambda driver: driver.find_element_by_id('KindleReaderIFrame'))
    driver.switch_to.frame(iFrame2)

    # select first page of book
    page_number = '2'

    def copy_page(page_number):
        page_selector = wait.until(lambda driver: driver.find_element_by_id(
            'kindleReader_button_goto'))
        wait.until(lambda driver: driver.find_element_by_id(
            'kindleReader_button_goto'))

        hover = ActionChains(driver).move_to_element(
            page_selector)  # make button visible

        condition = ''
        while (condition == ''):
            try:
                page_selector.click()
            except ElementNotVisibleException:
                hover = ActionChains(driver).move_to_element(
                    page_selector)  # make button visible
            else:
                condition = 'passed'

        condition = ''
        while (condition == ''):
            try:
                driver.find_element_by_id(
                    'kindleReader_goToMenuItem_goToLocation').click()
            except NoSuchElementException:
                page_selector.click()
            else:
                condition = 'passed'

        enter_page_num = wait.until(lambda driver: driver.find_element_by_id(
            "kindleReader_dialog_gotoField"))
        enter_page_num.send_keys(page_number)
        buttons = driver.find_elements_by_class_name("ui-button")
        for b in buttons:
            if b.text == 'Go to location':
                b.click()

        time.sleep(7)
        driver.get_screenshot_as_file('temp.png')
        text = get_page_text('temp.png')
        os.remove('temp.png')

        return text

    results = []
    for i in xrange(first_page, last_page + 1):
        text = copy_page(page_number=str(i))
        results.append(text)

    print results

    # use this code to change page
    '''
Beispiel #5
0
def daily_task():
    global DATE
    DATE = str(datetime.date.today())
    chromeOptions = webdriver.ChromeOptions()
    prefs = {"profile.managed_default_content_settings.images":2}
    chromeOptions.add_argument("--headless")
    chromeOptions.add_experimental_option("prefs",prefs)
    browser2 = webdriver.Chrome(chrome_options=chromeOptions,executable_path=CHROME_DRIVER_PATH)
    browser = webdriver.Chrome(chrome_options=chromeOptions,executable_path=CHROME_DRIVER_PATH)
    # browser2 = webdriver.Chrome(chrome_options=chromeOptions)
    # browser = webdriver.Chrome(chrome_options=chromeOptions)
    # browser = webdriver.Chrome()
    browser.set_window_position(400, 40)
    browser.set_window_size(1300, 1024)
    wait = ui.WebDriverWait(browser,60)
    wait2 = ui.WebDriverWait(browser,10)
    browser.get(BASE_URL)
    urls = []
    write_html(browser.page_source, "All_cat_")
    soup = BeautifulSoup(browser.page_source, 'lxml')
    main_list = soup.find('ul', class_='nav-verticalmenu').find_all('li')
    k=0
    for main_item in main_list:
        href = BASE_URL + main_item.find('a').get('href')
        browser.get(href)
        if k >= 1:
            break
        soup = BeautifulSoup(browser.page_source, 'lxml')
        list = soup.find('ul', class_='listSidebar').find_all('li')
        for item in list:
            if item.find('span', class_='pull-right') == None:
                continue
            else:
                url = BASE_URL + item.find('a').get('href')
                if url not in urls:
                    urls.append(url)
        k+=1

    j=0
    while j < len(urls):
        print('Scraping', urls[j])
        browser.get(urls[j])

        soup = BeautifulSoup(browser.page_source, 'lxml')
        category_titles = soup.find('ol', class_='breadcrumb').find_all('li')
        if len(category_titles) == 2:
            category = category_titles[1].find('span').text.strip()
        else:
            category = category_titles[1].find('span').text.strip()

        i=0
        pagination = True
        while pagination:
            if i != 0:
                try:
                    wait2.until(lambda browser: browser.find_element_by_css_selector('#pagination > ul'))
                    elements = browser.find_elements_by_css_selector('#pagination > ul > li')
                    c=0
                    while c < len(elements)-1:
                        class_name = elements[c].get_attribute("class")
                        if "active" in class_name:
                            if len(elements)-2 >= c+1:
                                href_glob = elements[c+1].find_element_by_css_selector('a').get_attribute("href")
                                browser.get(href_glob)
                                c+=1
                                break
                            else:
                                pagination = False
                                c+=1
                                break
                        c+=1
                except NoSuchElementException:
                    pagination = False
                except TimeoutException:
                    pagination = False
                except:
                    pagination = False
                try:
                    soup = BeautifulSoup(browser.page_source, 'lxml')
                    list = soup.find('div', class_='product_list').find_all('div', class_='product_block')
                except:
                    pagination = False
            if i == 0:
                try:
                    soup = BeautifulSoup(browser.page_source, 'lxml')
                    list = soup.find('div', class_='product_list').find_all('div', class_='product_block')
                except:
                    pagination = False
            if pagination == False:
                break
            # print(len(list))
            # print(i+1)
            for item in list:
                if item.find('a', class_='product-name') == None:
                    continue
                else:
                    href = BASE_URL + item.find('a', class_='product-name').get('href')
                    browser2.get(href)

                soup = BeautifulSoup(browser2.page_source, 'lxml')

                if soup.find('h1', itemprop='name') != None:
                    product_name = soup.find('h1', itemprop='name').text.strip()
                else:
                    product_name = None

                # ---brand, (shown as Nhãn hiệu)
                # ---availability (shown as Tình trạng)
                # ---delivery fee, (if exists)
                # ---1111111111product name,
                # ---111111111price,
                # ---1111111111old_price (previous price if exists),
                # ---1111111111category (name of category),
                # ---1111111111current date

                # if item.find('div', class_='english_name') != None:
                #     title_English = item.find('div', class_='english_name').text.strip()
                # else:
                #     title_English = None
                # print("Title: " + title)
                if soup.find('span', class_='price') != None:
                    price = soup.find('span', class_='price').text.strip()
                    # price = price.split('₫')[1]
                    # price = price.strip()
                else:
                    price = None

                if soup.find('span', class_='availability') != None:
                    availability = soup.find('span', class_='availability').text.strip()
                else:
                    availability = None
                # print("Price: " + str(price))
                if soup.find('span', class_='product-price-old') != None:
                    old_price = soup.find('span', class_='product-price-old').text.strip()
                    # old_price = old_price.split('₫')[1]
                    # old_price = old_price.strip()
                else:
                    old_price = None

                brand = None
                # availability = None
                m_list = soup.find('ul', class_='description').find_all('li')[0]
                brand = m_list.find('a').text.strip()

                data = {'category': category,
                        'product_name': product_name,
                        'brand': brand,
                        'availability': availability,
                        'price': price,
                        'old_price': old_price,
                        'date': DATE}
                write_csv(data)
            file_name = str(j+1) + "_" + str(i+1) + "_"
            write_html(browser.page_source, file_name)
            i+=1
        # print(j)
        j+=1
    # Close browser
    browser.close()
    browser.service.process.send_signal(signal.SIGTERM)
    browser.quit()
    # Close browser
    browser2.close()
    browser2.service.process.send_signal(signal.SIGTERM)
    browser2.quit()
    compress_data()
Beispiel #6
0
 def _wait_for_element(self, args):
     wait = ui.WebDriverWait(self.browser, self.timeout)
     wait.until(
         expected_conditions.element_to_be_clickable(
             (args["find_by"], args["find_text"])))
Beispiel #7
0
# encoding:utf-8
import requests
import re
import selenium.webdriver.support.ui as ui

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

brower = webdriver.Chrome()#选择打开的浏览器
wait = ui.WebDriverWait(brower, 10)  # 设置浏览器最长的加载时间

#查找内容
def search():
    try:
        brower.get('http://gou.jd.com/')  # 打开URL地址
        imput = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#inputkey'))# 查找输入框   元件的存在presence_of_element_located    使用CSS_SELECTOR选择器

        )
        submit = wait.until(
            EC.element_to_be_clickable((By.CSS_SELECTOR,
                                        '#search_2015 > div.search_box_2015.clearfix > a')))  # 点击按钮框    可以点击的元素element_to_be_clickable

        imput.send_keys('魅族手机')  # 输入查找关键字
        submit.click()  # 点击查找
        total = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#page > span > b')))  # 关于查找小辣椒手机所有的页数
        return total.text
    except TimeoutError:#判定失败继续返回执行查找
Beispiel #8
0
def find_element_by_suffix(driver, key, val, buffer=None):
    try:
        function = key.split('_')[0]
        option = key.split('_')[-1]
        isSyn = lambda option: True if option == 'syn' else False

        if function == 'url':
            driver.get(val)
            #old_page = driver.find_element_by_tag_name('html')
            #ui.WebDriverWait(driver, time_out).until(EC.staleness_of(old_page))
        elif function == 'click':
            if isSyn(option):
                element = find_element_by_syn(driver, val.split(','))
            else:
                locator = find_locator_by_option(option, val)
                ui.WebDriverWait(driver, time_out).until(
                    EC.visibility_of_element_located(locator))
                element = EC.element_to_be_clickable(locator)(driver)
            element.click()
        elif function == 'sendkeys':
            if isSyn(option):
                element = find_element_by_syn(driver, val.split(',')[1:])
            else:
                locator = find_locator_by_option(option, val.split(',')[-1])
                ui.WebDriverWait(driver, time_out).until(
                    EC.visibility_of_element_located(locator))
                element = EC.visibility_of_element_located(locator)(driver)
            element.send_keys(val.split(',')[0])
        elif function == 'none':
            if isSyn(option):
                find_element_by_syn(driver, val.split(','))
            else:
                locator = find_locator_by_option(option, val)
                ui.WebDriverWait(driver, time_out).until(
                    EC.visibility_of_element_located(locator))
                EC.visibility_of_element_located(locator)(driver)
        elif function == 'get':
            if isSyn(option):
                element = find_element_by_syn(driver, val.split(','))
            else:
                locator = find_locator_by_option(option, val)
                ui.WebDriverWait(driver, time_out).until(
                    EC.visibility_of_element_located(locator))
                element = EC.visibility_of_element_located(locator)(driver)
            buffer[key] = element.text
        elif function == 'getAttribute':
            if isSyn(option):
                element = find_element_by_syn(driver, val.split(','))
            else:
                locator = find_locator_by_option(option, val.split(',')[-1])
                ui.WebDriverWait(driver, time_out).until(
                    EC.visibility_of_element_located(locator))
                element = EC.visibility_of_element_located(locator)(driver)
            buffer[key] = element.get_attribute(val.split(',')[0])
        elif function == 'getByfilter':
            if isSyn(option):
                element = find_element_by_syn(driver, val.split(','))
            else:
                locator = find_locator_by_option(option, val.split(',')[-1])
                ui.WebDriverWait(driver, time_out).until(
                    EC.visibility_of_element_located(locator))
                element = EC.visibility_of_element_located(locator)(driver)
            myfilter = eval(val.split(',')[0])
            buffer[key] = myfilter(element.text)
        elif function == 'verify':
            if isSyn(option):
                element = find_element_by_syn(driver, val.split(','))
            else:
                verify_context = ""
                if val.split(',')[0][0] == '$':
                    verify_context = buffer[val.split(',')[0][1:]]
                else:
                    verify_context = val.split(',')[0]
                locator = find_locator_by_option(option, val.split(',')[-1])
                ui.WebDriverWait(driver, time_out).until(
                    EC.visibility_of_element_located(locator))
                element = EC.visibility_of_element_located(locator)(driver)
            if verify_context not in element.text:
                raise VarFail("Verify failed:" + verify_context)
        elif function == 'touch':
            if isSyn(option):
                element = find_element_by_syn(driver, val.split(','))
            else:
                touch = webdriver.TouchActions(driver)
                locator = find_locator_by_option(option, val)
                ui.WebDriverWait(driver, time_out).until(
                    EC.visibility_of_element_located(locator))
                element = EC.visibility_of_element_located(locator)(driver)
            touch.tap(element).perform()
        elif function == 'script':
            js = val.split(',')[0]
            if len(val.split(',')) == 1:
                driver.execute_script(js)
            else:
                parms = map(lambda pa: buffer[pa[1:]] if pa[0] == '$' else pa,
                            val.split(',')[1:])
                driver.execute_script(js, parms)
        elif function == 'post':
            account, password, method = val.split(',')[:3]
            parms = map(lambda pa: buffer[pa[1:]] if pa[0] == '$' else pa,
                        val.split(',')[3:])
            eval("WebsiteHelper()()." + method)(*parms)
        elif function == 'alert':
            alert = driver.switch_to.alert
            eval("alert." + val + "()")
    except Exception, ex:
        raise VarAbort(ex.__str__())
Beispiel #9
0
def contracts(date):
    print("NSE CONTRACTS")
    driver = webdriver.Chrome(r'E:\chromedriver.exe')

    #driver.get("https://www.zaubacorp.com/")
    driver.get("https://www.nseindia.com/products/content/equities/equities/oi_spurts.htm")


    time.sleep(4);

    wait = ui.WebDriverWait(driver, 10)
    wait.until(page_is_loaded)

    ##END OF UNDERLYNING

    button=driver.find_element_by_xpath('//*[@id="tab8"]')

    button.click()
    time.sleep(5)
    df1=pd.DataFrame()

    columns1=["Instrument","Symbol","Expiry","Strike Price","Type","LTP","Prev.Close","%Change in LTP",date+" OI","Jan24,2018 OI","OI Change","Volume in contracts","TurnOver in crores","Premium Turnover in crores","Underlyning Value","Type of OI Spurts","Current Business Date","Previous Business Date"]
    #s=['Rise in OI-Rise in Price','Rise in OI-Slide in Price','Slide in OI-Rise in Price','Slide in OI-Slide in Price']

    k=0

    ul=driver.find_elements_by_xpath('//*[@id="replacetext"]/div/ul')

    ul=['//*[@id="replacetext"]/div/ul/li[2]','//*[@id="replacetext"]/div/ul/li[3]','//*[@id="replacetext"]/div/ul/li[4]']

    time.sleep(5)
    row_count=len(driver.find_elements_by_xpath('//*[@id="replacetext"]/table/tbody/tr'))
    col_count=len(driver.find_elements_by_xpath('//*[@id="replacetext"]/table/tbody/tr[2]/td'))
    print (row_count)
    print (col_count)

    i=2
    d1={}
    first_str='//*[@id="replacetext"]/table/tbody/tr['
    second_str=']/td['
    third_str=']'


    print ("FIRST ")
    while i<=row_count:
        j=1
        while j<=col_count:
            final_str=first_str+str(i)+second_str+str(j)+third_str
            d1[columns1[j-1]]=driver.find_element_by_xpath(final_str).text
            j=j+1
        d1[columns1[j-1]]="Rise in OI-Rise in Price"
        j=j+1
        d1[columns1[j-1]]=date
        j=j+1
        d1[columns1[j-1]]='Jan24,2018'
        i=i+1
        df1=df1.append(d1,ignore_index=True)

    button=driver.find_element_by_xpath('//*[@id="riseinOIslideinPrice"]')
    button.click()

    time.sleep(5)
    print ("SECOND ")

    row_count=len(driver.find_elements_by_xpath('//*[@id="replacetext"]/table/tbody/tr'))
    col_count=len(driver.find_elements_by_xpath('//*[@id="replacetext"]/table/tbody/tr[2]/td'))

    i=2
    while i<=row_count:
        j=1
        while j<=col_count:
            final_str=first_str+str(i)+second_str+str(j)+third_str
            d1[columns1[j-1]]=driver.find_element_by_xpath(final_str).text
            j=j+1
        d1[columns1[j-1]]="Rise in OI-Slide in Price"
        j=j+1
        d1[columns1[j-1]]=date
        j=j+1
        d1[columns1[j-1]]='Jan24,2018'
        i=i+1
        df1=df1.append(d1,ignore_index=True)

    button=driver.find_element_by_xpath('//*[@id="slideinOIriseinPrice"]')
    button.click()
    time.sleep(5)
    print ("third")

    row_count=len(driver.find_elements_by_xpath('//*[@id="replacetext"]/table/tbody/tr'))
    col_count=len(driver.find_elements_by_xpath('//*[@id="replacetext"]/table/tbody/tr[2]/td'))

    i=2
    while i<=row_count:
        j=1
        while j<=col_count:
            final_str=first_str+str(i)+second_str+str(j)+third_str
            d1[columns1[j-1]]=driver.find_element_by_xpath(final_str).text
            j=j+1
        d1[columns1[j-1]]="Slide in OI-Rise in Price"
        j=j+1
        d1[columns1[j-1]]=date
        j=j+1
        d1[columns1[j-1]]='Jan24,2018'
        i=i+1
        df1=df1.append(d1,ignore_index=True)

    button=driver.find_element_by_xpath('//*[@id="slideinOIslideinPrice"]')
    button.click()
    time.sleep(5)
    print ("fourth")

    row_count=len(driver.find_elements_by_xpath('//*[@id="replacetext"]/table/tbody/tr'))
    col_count=len(driver.find_elements_by_xpath('//*[@id="replacetext"]/table/tbody/tr[2]/td'))

    i=2
    while i<=row_count:
        j=1
        while j<=col_count:
            final_str=first_str+str(i)+second_str+str(j)+third_str
            d1[columns1[j-1]]=driver.find_element_by_xpath(final_str).text
            j=j+1
        d1[columns1[j-1]]="Slide in OI-Slide in Price"
        j=j+1
        d1[columns1[j-1]]=date
        j=j+1
        d1[columns1[j-1]]='Jan24,2018'
        i=i+1
        df1=df1.append(d1,ignore_index=True)

    f = open(date + ".xlsx")
    st = os.path.realpath(f.name)
    path = st
    book = load_workbook(path)
    writer = pd.ExcelWriter(path, engine='openpyxl')
    writer.book = book
    df1.to_excel(writer,sheet_name="NSE CONTRACTS",columns=["Instrument","Symbol","Expiry","Strike Price","Type","LTP","Prev.Close","%Change in LTP",date+" OI","Jan24,2018 OI","OI Change","Volume in contracts","TurnOver in crores","Premium Turnover in crores","Underlyning Value","Type of OI Spurts","Current Business Date","Previous Business Date"])
    writer.save()
    writer.close()
    driver.close()
 def is_visible(self, locator_type, locator, timeout=2):
     try:
         ui.WebDriverWait(self.driver, timeout).until(EC.visibility_of_element_located((locator_type, locator)))
         return True
     except TimeoutException:
         return False
 def is_clickable(self, locator_type, locator, timeout=2):
     try:
         ui.WebDriverWait(self.driver, timeout).until(EC.element_to_be_clickable((locator_type, locator)))
         return True
     except TimeoutException:
         return False
Beispiel #12
0
import os
import os.path
import re
import unicodedata

disp = False
usuario = 'julioazt'
senha = '9517539'

if disp:
    display = Display(visible=0, size=(800, 600))
    aux = display.start()
chromedriver = "chromedriver"
os.environ["webdriver.chrome.driver"] = chromedriver
driver = webdriver.Chrome(chromedriver)
wait = ui.WebDriverWait(driver, 20)
url_login = '******'
driver.get(url_login)
wait.until(lambda driver: len(driver.find_elements_by_id("_CTL")) > 0)
time.sleep(1)
driver.find_element_by_id('_USR_LOGIN').send_keys(usuario)
driver.find_element_by_id('_CTL').click()
wait.until(lambda driver: len(
    driver.find_element_by_id("span__USR_NOME").get_attribute('innerHTML')) > 0
           )
driver.find_element_by_id('_CTL').send_keys(senha + Keys.ENTER)
time.sleep(5)


def espera():
    wait.until(lambda driver: ('display: none' in driver.find_element_by_id(
Beispiel #13
0
        def get_main_data():
            table = driver.find_element_by_id("bibview")

            data = {"Alias": alias}
            for tr in table.find_elements_by_tag_name("tr"):
                tds = tr.find_elements_by_tag_name("td")
                even = False
                key = None
                for td in tds:
                    if not even:
                        key = td.text.strip()
                    else:
                        if key and td.text.strip() not in ["", "-"]:
                            data[key] = _normalize_date_str(
                                td.text.strip().replace(" all Inventors", ""))
                    even = not even

                table = driver.find_element_by_id("bibviewTitle")
                td = table.find_elements_by_tag_name("td")
                data["Title"] = td[1].text.strip()

            try:
                print("ptaptetab")
                self._switch_page("javascript:submitTab('ptaptetab')")
                table = webDriverUi.WebDriverWait(driver, 20).until(
                    (EC.presence_of_element_located([
                        By.XPATH, "//table[@id='ptaptesummarytable' and "
                        "@cellpadding='3']"
                    ])))

                key = None
                for td in table.find_elements_by_tag_name("td"):
                    if not key:
                        key = td.text.strip()
                    else:
                        data[key] = _normalize_date_str(td.text.strip())
                        key = None

            except SiteUsLinkNotFound:
                print("No tab Patent term Adjustments")

            try:
                print("Correspondencetab")
                self._switch_page("javascript:submitTab('Correspondencetab')")
                table = webDriverUi.WebDriverWait(driver, 20).until(
                    (EC.presence_of_element_located(
                        [By.XPATH, "//table[@id='correspondence']"])))

                key = None
                for td in table.find_elements_by_tag_name("td")[1:]:
                    if not key:
                        key = td.text.strip()
                    else:
                        data[key] = _normalize_date_str(td.text.strip())
                        key = None

                data["Agent"] = "Name: " + data["Name:"] + "\n\n" + \
                    "Address:\n" + data["Address:"]

            except SiteUsLinkNotFound:
                print("No tab Address & Attorney/Agent")

            try:
                print("continuitytab")
                self._switch_page("javascript:submitTab('continuitytab')")
                table = webDriverUi.WebDriverWait(driver, 10).until(
                    (EC.presence_of_element_located(
                        [By.XPATH, "//table[@id='continuityparent']"])))

                try:
                    con_data = []
                    keys = []
                    for td in table.find_elements_by_tag_name("th"):
                        keys.append(td.text.strip())

                    for tr in table.find_elements_by_id("parentdata0"):
                        key = None
                        value = ""

                        one = {
                            "Alias": alias,
                        }
                        con_data.append(one)
                        i = 0
                        for td in tr.find_elements_by_tag_name("td"):
                            one[keys[i]] = _normalize_date_str(td.text.strip())
                            i += 1

                    self._saver.save_parent_continuity(con_data)
                except:
                    print("No parent data.")

                try:
                    str_ = ""
                    for tr in driver.find_elements_by_id("childdata0"):
                        str_ += "\n" + _normalize_date_str(tr.text.strip())

                    data["Child Continuity Data"] = str_.strip()
                except:
                    print("No child data.")

            except SiteUsLinkNotFound:
                print("No tab Continuity Data")

            try:
                print("foreignPrioritiestab")
                self._switch_page(
                    "javascript:submitTab('foreignPrioritiestab')")

                td = webDriverUi.WebDriverWait(driver, 20).until(
                    (EC.presence_of_element_located(
                        [By.XPATH, "//td[@id='forpriority']"])))
                table = td.find_element_by_tag_name("table")
                key = "Country |Priority |Priority Date ;"
                value = ""
                for tr in table.find_elements_by_xpath(
                        "//tr[@class='wpsTableNrmRow' or @class='wpsTableShdRow']"
                ):

                    next_val = ""
                    for td in tr.find_elements_by_tag_name("td"):
                        next_val += " |" + td.text.strip()

                    next_val = next_val[2:]

                    value += "\n" + next_val

                data[key] = _normalize_date_str(value.strip())

            except SiteUsLinkNotFound:
                print("No tab Foreign Priority")

            self._saver.save_main_data(data)
            return data
Beispiel #14
0
    def _process_number(self, number, number_type, alias):
        def _normalize_date_str(date_str):
            date_str = re.sub(r'(\d{2})\-(\d{2})\-(\d{4})',
                              r'\g<2>/\g<1>/\g<3>', date_str)
            return date_str

        def get_main_data():
            table = driver.find_element_by_id("bibview")

            data = {"Alias": alias}
            for tr in table.find_elements_by_tag_name("tr"):
                tds = tr.find_elements_by_tag_name("td")
                even = False
                key = None
                for td in tds:
                    if not even:
                        key = td.text.strip()
                    else:
                        if key and td.text.strip() not in ["", "-"]:
                            data[key] = _normalize_date_str(
                                td.text.strip().replace(" all Inventors", ""))
                    even = not even

                table = driver.find_element_by_id("bibviewTitle")
                td = table.find_elements_by_tag_name("td")
                data["Title"] = td[1].text.strip()

            try:
                print("ptaptetab")
                self._switch_page("javascript:submitTab('ptaptetab')")
                table = webDriverUi.WebDriverWait(driver, 20).until(
                    (EC.presence_of_element_located([
                        By.XPATH, "//table[@id='ptaptesummarytable' and "
                        "@cellpadding='3']"
                    ])))

                key = None
                for td in table.find_elements_by_tag_name("td"):
                    if not key:
                        key = td.text.strip()
                    else:
                        data[key] = _normalize_date_str(td.text.strip())
                        key = None

            except SiteUsLinkNotFound:
                print("No tab Patent term Adjustments")

            try:
                print("Correspondencetab")
                self._switch_page("javascript:submitTab('Correspondencetab')")
                table = webDriverUi.WebDriverWait(driver, 20).until(
                    (EC.presence_of_element_located(
                        [By.XPATH, "//table[@id='correspondence']"])))

                key = None
                for td in table.find_elements_by_tag_name("td")[1:]:
                    if not key:
                        key = td.text.strip()
                    else:
                        data[key] = _normalize_date_str(td.text.strip())
                        key = None

                data["Agent"] = "Name: " + data["Name:"] + "\n\n" + \
                    "Address:\n" + data["Address:"]

            except SiteUsLinkNotFound:
                print("No tab Address & Attorney/Agent")

            try:
                print("continuitytab")
                self._switch_page("javascript:submitTab('continuitytab')")
                table = webDriverUi.WebDriverWait(driver, 10).until(
                    (EC.presence_of_element_located(
                        [By.XPATH, "//table[@id='continuityparent']"])))

                try:
                    con_data = []
                    keys = []
                    for td in table.find_elements_by_tag_name("th"):
                        keys.append(td.text.strip())

                    for tr in table.find_elements_by_id("parentdata0"):
                        key = None
                        value = ""

                        one = {
                            "Alias": alias,
                        }
                        con_data.append(one)
                        i = 0
                        for td in tr.find_elements_by_tag_name("td"):
                            one[keys[i]] = _normalize_date_str(td.text.strip())
                            i += 1

                    self._saver.save_parent_continuity(con_data)
                except:
                    print("No parent data.")

                try:
                    str_ = ""
                    for tr in driver.find_elements_by_id("childdata0"):
                        str_ += "\n" + _normalize_date_str(tr.text.strip())

                    data["Child Continuity Data"] = str_.strip()
                except:
                    print("No child data.")

            except SiteUsLinkNotFound:
                print("No tab Continuity Data")

            try:
                print("foreignPrioritiestab")
                self._switch_page(
                    "javascript:submitTab('foreignPrioritiestab')")

                td = webDriverUi.WebDriverWait(driver, 20).until(
                    (EC.presence_of_element_located(
                        [By.XPATH, "//td[@id='forpriority']"])))
                table = td.find_element_by_tag_name("table")
                key = "Country |Priority |Priority Date ;"
                value = ""
                for tr in table.find_elements_by_xpath(
                        "//tr[@class='wpsTableNrmRow' or @class='wpsTableShdRow']"
                ):

                    next_val = ""
                    for td in tr.find_elements_by_tag_name("td"):
                        next_val += " |" + td.text.strip()

                    next_val = next_val[2:]

                    value += "\n" + next_val

                data[key] = _normalize_date_str(value.strip())

            except SiteUsLinkNotFound:
                print("No tab Foreign Priority")

            self._saver.save_main_data(data)
            return data

        def get_event_data():
            table = driver.find_element_by_id("bibcontents")

            data = []
            for tr in table.find_elements_by_xpath(
                    "//tr[@class='wpsTableNrmRow' or @class='wpsTableShdRow']"
            ):
                tds = tr.find_elements_by_tag_name("td")
                data.append({
                    "Alias": alias,
                    "Input": input_field,
                    "Date": _normalize_date_str(tds[0].text.strip()),
                    "Action": tds[1].text.strip(),
                })

            self._saver.save_evt_history_data(data)
            return data

        def get_documents_data():

            table = driver.find_element_by_id("ifwinnertable")

            data = []
            for tr in table.find_elements_by_xpath(
                    "//tr"
                    "[@class='wpsTableNrmRow' or @class='wpsTableShdRow']"):
                tds = tr.find_elements_by_tag_name("td")

                data.append({
                    "Alias": alias,
                    "Input": input_field,
                    "Date": _normalize_date_str(tds[0].text.strip()),
                    "Document type": tds[2].text.strip(),
                    "Category": tds[3].text.strip(),
                    "Number of pages": tds[4].text.strip(),
                })

            dosnum = re.search(
                "document\.downloadForm\.dosnum\.value='(\d+)';",
                driver.page_source)

            dosnum = dosnum.group(1)

            sels = "0" * len(data)

            try:
                i = 0
                for d in data:

                    sel = sels[:i] + "1" + sels[i + 1:]
                    url = "http://portal.uspto.gov/pair/download/ShowPdfBook?" \
                        "dosnum=%s&sels=%s" % (dosnum, sel)

                    d["Link"] = url
                    i += 1
            except Exception as e:
                print(e)
                raise

            self._saver.save_documents_data(data)

            print("")
            if not self._do_download:
                print("Download is turned off.")
                return

            print("Downloading files.")

            self._fetcher.clear_cookies()
            s = self._fetcher.get_session()

            for cook in driver.get_cookies():
                s.cookies[cook["name"]] = cook["value"]

            files_dir = os.path.dirname(os.path.realpath(__file__))
            files_dir = os.path.join(files_dir, "../Output/%s" % alias)

            try:
                os.stat(files_dir)
            except:
                os.mkdir(files_dir)

            for i in range(len(sels)):
                sel = sels[:i] + "1" + sels[i + 1:]

                url = "http://portal.uspto.gov/pair/download/ShowPdfBook?" \
                    "dosnum=%s&sels=%s" % (dosnum, sel)

                filename = "%d - %s.pdf" % \
                            (i, fs.clean_filename(data[i]["Document type"]))

                print("Downloading file: %s" % filename)

                self._fetcher.download_file(url,
                                            os.path.join(files_dir, filename))

            return data

        print("Processing number: %s (%s)" % (number, number_type))

        input_field = "%s (%s)" % (number, number_type
                                   )  # setting up input field

        print("")
        print("Entering number...")

        self._switch_page("javascript:submitTab('pair_search')")
        driver = self._driver

        # waiting for JavaScript to finish
        webDriverUi.WebDriverWait(driver, 20) \
                .until(EC.presence_of_element_located([By.ID, "SubmitPAIR"]))

        if number_type == "USA":
            driver.find_element_by_xpath(
                "//input[@title='application number']").click()
        elif number_type == "USPUB":
            driver.find_element_by_xpath(
                "//input[@title='publication number']").click()
        elif number_type == "USPAT":
            driver.find_element_by_xpath(
                "//input[@title='patent number']").click()
        else:
            raise Exception("Unknown number type: %s" % number_type)

        driver.find_element_by_id("number_id").send_keys(number)
        driver.find_element_by_id("SubmitPAIR").click()

        print("WAITING...")
        element = webDriverUi.WebDriverWait(driver, 20).until(
            self._wait([
                "//img[@alt='Application Data']", "//div[@id='ERRORDIV']",
                "//div[@id='ERRORDIVPALMPROBLEM']",
                "//table[@class='epoTableBorder']//font[@color='red']"
            ]))

        if element.get_attribute('id') == 'ERRORDIVPALMPROBLEM':
            print("Overloaded, trying again in 5 seconds...")
            time.sleep(5)
            return self._process_number(number, number_type, alias)

        if element.text:
            if "Service not available at this time" in element.text:
                print("Service not available, trying again in 5 seconds...")
                time.sleep(5)
                return self._process_number(number, number_type, alias)

            raise SiteUsNoNumberException("Error: %s" % element.text.strip())

        self._switch_page("javascript:submitTab('detailstab')")
        try:
            element = webDriverUi.WebDriverWait(driver, 20).until(
                self._wait(["//img[@src='/pair/img/tabs/image1on.gif']"]))
        except:
            print("Wrong tab opened?...")
            self._process_number(number, number_type, alias)

        # main data
        print("")
        print("Getting main data...")
        get_main_data()

        # history data
        print("")
        print("Getting history data...")

        try:
            self._switch_page("javascript:submitTab('fileHistorytab')")
            webDriverUi.WebDriverWait(driver, 20).until(
                (EC.presence_of_element_located([By.ID, "bibcontents"])))

            get_event_data()
        except SiteUsException:
            print("No history data")

        print("")
        print("Getting document data...")

        try:
            self._switch_page("javascript:submitTab('ifwtab')")
            webDriverUi.WebDriverWait(driver, 20).until(
                (EC.presence_of_element_located([By.ID, "ifwinnertable"])))
            get_documents_data()

        except SiteUsException:
            print("No document data")
 def wait_element_is_clickable(self, method, element, sec=10):
     return ui.WebDriverWait(self.driver, sec).until(
         EC.element_to_be_clickable((method, element)))
Beispiel #16
0
def is_not_visible(locator, timeout=20):
    try:
        ui.WebDriverWait(WebOp.shared_wd, timeout).until_not(EC.visibility_of_element_located((By.XPATH, locator)))
        return True
    except TimeoutException:
        return False
 def wait_for_sidebar_is_loaded(self, sec=10):
     ui.WebDriverWait(self.driver, sec).until(
         EC.presence_of_element_located(
             (by.By.CSS_SELECTOR, "div#sidebar li.active")))
     time.sleep(0.5)
Beispiel #18
0
def downloadWebPMUpage():
	try:
		connnexion = sqlite3.connect('../03 - BDD/BasePMU.db')
	except sqlite3.Error as er:
		print ('une erreur est survenue lors de la connection de la base' + er.message)
		exit(1)

	cursor = connnexion.cursor() 



	url='https://info.pmu.fr'

	caps = DesiredCapabilities.FIREFOX.copy()

	caps['marionette'] = True
	br = webdriver.Firefox(capabilities=caps, executable_path='./Package/geckodriver.exe')
	br.get(url)

	soup=''
	wait = ui.WebDriverWait(br,5)

	br.find_element_by_class_name('cnil-close').click()

	wait = ui.WebDriverWait(br,5)

	# #------------ Janvier
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 

	# #-------------décembre
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 	

	# #-------------Novembre
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 	
	
	# #------------Octobre
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 	

	# #-----------Septembre
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 	
	
	# #----------- aout
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 	
	
	# #----------- Juillet
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 	

	# #-------------Juin 
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 	

	# #-------------mai 
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 	


	# #-------------Avril 
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 	


	# #-------------Février 
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 	

	# #-------------Janvier 
	# time.sleep(2)
	# br.find_element_by_xpath("//div[@class='date']").click()
	# br.find_element_by_xpath("//a[@class='ui-datepicker-prev ui-corner-all']").click()
	# br.find_element_by_xpath(u'//a[text()="1"]').click() 	

	for v in range(1,8):
		time.sleep(2)
		wait = ui.WebDriverWait(br,5)
		br.find_element_by_xpath("//div[@class='date']").click()
		br.find_element_by_xpath(u'//a[text()="'+str(v)+'"]').click()

		wait = ui.WebDriverWait(br,5)
		time.sleep(10)	
		link = br.find_element_by_xpath	("(//button[contains(text(),'Programme en détails')])")
		link.click()
		wait = ui.WebDriverWait(br,5)

		soup=BeautifulSoup(br.page_source,'lxml')

		
		writeFile(soup.prettify(),'../02 - Page Web/listeProgramme.html')

		pagePMU = codecs.open('../02 - Page Web/listeProgramme.html', 'r','utf-8')
		listCourse=listeCoursePMU(pagePMU)
		print(listCourse)

		for j in range(0,len(listCourse)):
			query = ('UPDATE COURSE SET HIPPODROME="%s" WHERE URL = "%s" ' % 
			(	listCourse[j][0],listCourse[j][3]) )

			print(query)
			cursor.execute(query)
			connnexion.commit()

	

	br.close()
Beispiel #19
0
 def _do_login_step_2(self, args):
     wait = ui.WebDriverWait(self.browser, self.timeout)
     wait.until(
         expected_conditions.element_to_be_clickable((By.ID, "nav_logout")))
Beispiel #20
0
url = "https://www.instagram.com/p/CH-MgQOn-7E/" # Chagent this with any other competition " "
#We go to the url
time.sleep(timer)
driver.get(url) 
time.sleep(timer)
#Technical things
counter = 0
friendList = ['@','@','@'] # Vale ta onomata ton filon sou opos vlepeis edo me komma endiamesa '@','@','@', ..


while(True):
    try:
        # Create the comment
        send = friendList[random.randint(0,len(friendList)-1)] + " " + friendList[random.randint(0,len(friendList)-1)] + " " + friendList[random.randint(0,len(friendList)-1)]
        # lETS TRY
        comment_box = ui.WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "textarea.Ypffh")))
        comment_box.send_keys(send)
        comment_box.send_keys(Keys.ENTER)
        time.sleep(timer)
        driver.refresh() # Doesnt sleep so this is an alternative

        #Counter increase and wait 5 seconds
        counter+=1
        if(counter>=100): # If more than 20 comments, sleep
            time.sleep(60*20)
    except Exception as e:
        #If exception continue
        print(e)
        continue
    
    
def getURL_Title():
    global save_name
    SUMRESOURES = 0
    url = 'https://movie.douban.com'
    driver_item = webdriver.Firefox()
    wait = ui.WebDriverWait(driver_item, 15)
 def get_element_id(self, el_name, sec=10):
     el = ui.WebDriverWait(self.driver, sec).until(
         EC.presence_of_element_located(
             (by.By.XPATH, consts.AppPackages.format(el_name))))
     path = el.get_attribute("id")
     return path.split('__')[-1]
Beispiel #23
0
def is_not_visible(driver, locator, method, timeout=10):
    try:
        ui.WebDriverWait(driver, timeout).until_not(EC.visibility_of_element_located((method, locator)))
        return True
    except TimeoutException:
        return False
 def check_element_on_page(self, method, value, sec=10):
     try:
         ui.WebDriverWait(self.driver, sec).until(
             EC.presence_of_element_located((method, value)))
     except exc.TimeoutException:
         self.fail("Element {0} is not preset on the page".format(value))
Beispiel #25
0
from selenium import webdriver
import time
import selenium.webdriver.support.ui as ui
driver = webdriver.Firefox()
url = "http://mail.163.com/"
wait = ui.WebDriverWait(driver, 10)
driver.get(url)
time.sleep(5)

#跳转到登陆frame
frame = driver.find_element_by_id('mainBg').find_element_by_class_name(
    'loginWrap').find_element_by_id('loginDiv').find_element_by_css_selector(
        'iframe')
driver.switch_to.frame(frame)

time.sleep(5)

#登陆邮箱
your_mail = ''
your_pwd = ''

driver.find_element_by_name("email").send_keys(your_mail)
driver.find_element_by_name("password").send_keys(your_pwd)
time.sleep(3)

driver.find_element_by_id("dologin").click()
time.sleep(6)
driver.quit()
print("login in")
 def wait_for_alert_message(self, sec=5):
     locator = (by.By.CSS_SELECTOR, 'div.alert-success')
     logger.debug("Waiting for a success message")
     ui.WebDriverWait(self.driver, sec).until(
         EC.presence_of_element_located(locator))
Beispiel #27
0
def wait_element_visible(locator, timeOut=5):
    try:
        return ui.WebDriverWait(browser, timeOut).until(
            EC.visibility_of_element_located(locator))
    except Exception:
        return False
 def wait_for_error_message(self, sec=20):
     locator = (by.By.CSS_SELECTOR, 'div.alert-danger > p')
     logger.debug("Waiting for an error message")
     ui.WebDriverWait(self.driver, sec, 1).until(
         EC.presence_of_element_located(locator))
     return self.driver.find_element(*locator).text
Beispiel #29
0
def daily_task():
    global DATE
    DATE = str(datetime.date.today())
    chromeOptions = webdriver.ChromeOptions()
    prefs = {"profile.managed_default_content_settings.images":2}
    # chromeOptions.add_argument("--disable-javascript")
    chromeOptions.add_argument("--headless")
    chromeOptions.add_experimental_option("prefs",prefs)
    browser = webdriver.Chrome(options=chromeOptions,executable_path=CHROME_DRIVER_PATH)
    # browser = webdriver.Chrome()
    browser.set_window_position(400, 40)
    browser.set_window_size(1300, 1024)
    wait = ui.WebDriverWait(browser,60)
    urls = []
    browser.get('https://www.careerlink.vn/en')
    soup = BeautifulSoup(browser.page_source, 'lxml')
    category_list = soup.find('div', id='search-by-category').find_all('a')
    for item in category_list:
        url = BASE_URL + item.get('href')
        if url not in urls:
            urls.append(url)
    write_html(browser.page_source, "All_cat_")
    j=0
    while j < len(urls):
        browser.get(urls[j])
        wait.until(lambda browser: browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div[1]/div[1]/p'))
        soup = BeautifulSoup(browser.page_source, 'lxml')

        category = soup.find('p', class_='lead-sm').find('strong').text.strip()
        category = category.replace('"', '')


        i=0
        pagination = True
        while pagination:
            soup = BeautifulSoup(browser.page_source, 'lxml')
            if i != 0:
                if i == 1:
                    browser.get(urls[j])
                    file_name = str(j+1) + "_" + str(i) + "_"
                    write_html(browser.page_source, file_name)
                else:
                    browser.get(href_glob)
                    file_name = str(j+1) + "_" + str(i) + "_"
                    write_html(browser.page_source, file_name)
                wait.until(lambda browser: browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div[3]/nav/ul'))
                elements = browser.find_elements_by_css_selector('ul.pagination > li')
                if len(elements) == 1:
                    pagination = False
                    break
                c=0
                while c < len(elements):
                    class_name = elements[c].get_attribute("class")
                    if "active" in class_name:
                        if len(elements)-1 >= c+1:
                            href_glob = elements[c+1].find_element_by_css_selector('a').get_attribute("href")
                            browser.get(href_glob)
                            c+=1
                            break
                        else:
                            pagination = False
                            c+=1
                            break
                    c+=1
                wait.until(lambda browser: browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div[3]/nav/ul'))
                soup = BeautifulSoup(browser.page_source, 'lxml')
                list = soup.find('div', class_='list-group').find_all('div', class_='list-group-item')
            if i == 0:
                soup = BeautifulSoup(browser.page_source, 'lxml')
                list = soup.find('div', class_='list-group').find_all('div', class_='list-group-item')
            if pagination == False:
                break
            # print(len(list))
            # print(i+1)
            for item in list:
                # if item.find('div', class_='ct_title') != None:
                #     title = item.find('div', class_='ct_title').text.strip()
                # else:
                #     title = None
                href = BASE_URL + item.find('a').get('href')
                browser.get(href)
                wait.until(lambda browser: browser.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div[1]'))
                soup = BeautifulSoup(browser.page_source, 'lxml')

                try:
                    if soup.find('span', itemprop='baseSalary') != None:
                        Salary = soup.find('span', itemprop='baseSalary').text.strip()
                    else:
                        Salary = None
                except:
                    Salary = None

                try:
                    if soup.find('span', itemprop='address') != None:
                        Work_location = soup.find('span', itemprop='address').text.strip()
                    else:
                        Work_location = None
                except:
                    Work_location = None

                try:
                    if soup.find('div', itemprop='skills') != None:
                        Job_Requirement = soup.find('div', itemprop='skills').text.strip()
                    else:
                        Job_Requirement = None
                except:
                    Job_Requirement = None

                try:
                    if soup.find('div', itemprop='description') != None:
                        Job_Description = soup.find('div', itemprop='description').text.strip()
                    else:
                        Job_Description = None
                except:
                    Job_Description = None

                # 5555555555 ---Salary,
                # 5555555555 ---Work location, (shown as “Work Location”)
                # ---Job level, (shown as “Career Level”)
                # ---Industry, (shown as “Job Category”)
                # ---Job type, (shown as “Position Type”)
                # ---Age, (shown as “Age”)
                # ---Gender, (shown as “Gender Require”)
                # ---Experience, (shown as “Experience Level”)
                # ---Education, (shown as “Education Level”)
                # 5555555555 ---Job Description, (shown as “Job Description Detail”)
                # 5555555555 ---Job Requirement, (shown as “Required Experience/Skills Detail)
                # ---Benefits, (if exists)

                Job_level = None
                Industry = None
                Job_type = None
                Age = None
                Gender = None
                Experience = None
                Education = None
                try:
                    ul = soup.select('div.job-data > ul.list-unstyled')[1]
                    lis = ul.find_all('li')
                    for li in lis:
                        txt = li.text.strip()
                        if "Career Level" in txt:
                            Job_level = txt
                            # Job_level = li.text.strip()
                            Job_level = Job_level.replace('Career Level:','')
                            Job_level = Job_level.strip()
                            continue
                        if "Job Category" in txt:
                            Industry = txt
                            # Industry = li.text.strip()
                            Industry = Industry.replace('Job Category:','')
                            Industry = Industry.strip()
                            continue
                        if "Position Type" in txt:
                            Job_type = txt
                            # Job_type = li.text.strip()
                            Job_type = Job_type.replace('Position Type:','')
                            Job_type = Job_type.strip()
                            continue
                        if "Age" in txt:
                            Age = txt
                            # Age = li.text.strip()
                            Age = Age.replace('Age:','')
                            Age = Age.strip()
                            continue
                        if "Gender Require" in txt:
                            Gender = txt
                            # Gender = li.text.strip()
                            Gender = Gender.replace('Gender Require:','')
                            Gender = Gender.strip()
                            continue
                        if "Experience Level" in txt:
                            Experience = txt
                            # Experience = li.text.strip()
                            Experience = Experience.replace('Experience Level:','')
                            Experience = Experience.strip()
                            continue
                        if "Education Level" in txt:
                            Education = txt
                            # Education = li.text.strip()
                            Education = Education.replace('Education Level:','')
                            Education = Education.strip()
                            continue
                except:
                    Job_level = None
                    Industry = None
                    Job_type = None
                    Age = None
                    Gender = None
                    Experience = None
                    Education = None


                data = {'category': category,
                        'Salary': Salary,
                        'Work_location': Work_location,
                        'Job_level': Job_level,
                        'Industry': Industry,
                        'Job_type': Job_type,
                        'Age': Age,
                        'Gender': Gender,
                        'Experience': Experience,
                        'Education': Education,
                        'Job_Description': Job_Description,
                        'Job_Requirement': Job_Requirement,
                        'date': DATE}
                write_csv(data)
            # file_name = str(j+1) + "_" + str(i+1) + "_"
            # write_html(browser.page_source, file_name)
            i+=1
        j+=1
    # Close browser
    browser.close()
    browser.service.process.send_signal(signal.SIGTERM)
    browser.quit()
    compress_data()
Beispiel #30
0
def read(path, loadjs=False, session=None, driver=None, timeout=60,
        clear_cookies=True, loadjs_wait_time=3, loadjs_wait_for_callback=None, strict=True):
    """Reads from source and returns contents

    Args:
        path: (str) url or local path to download
        loadjs: (boolean) indicates whether to load js (optional)
        session: (requests.Session) session to use to download (optional)
        driver: (selenium.webdriver) webdriver to use to download (optional)
        timeout: (int) Maximum number of seconds to wait for the request to complete.
        clear_cookies: (boolean) whether to clear cookies.
        loadjs_wait_time: (int) if loading JS, seconds to wait after the
            page has loaded before grabbing the page source
        loadjs_wait_for_callback: (function<selenium.webdriver>) if loading
            JS, a callback that will be invoked to determine when we can
            grab the page source. The callback will be called with the
            webdriver, and should return True when we're ready to grab the
            page source. For example, pass in an argument like:
            ``lambda driver: driver.find_element_by_id('list-container')``
            to wait for the #list-container element to be present before rendering.
        strict: (bool) If False, when download fails, retry but allow parsing even if there
            is still minimal network traffic happening. Useful for sites that regularly poll APIs.
    Returns: str content from file or page
    """
    session = session or DOWNLOAD_SESSION

    if clear_cookies:
        session.cookies.clear()

    try:
        if loadjs:                                              # Wait until js loads then return contents
            if USE_PYPPETEER:
                content = asyncio.get_event_loop().run_until_complete(load_page(path))
                return content

            if PHANTOMJS_PATH:
                driver = driver or webdriver.PhantomJS(executable_path=PHANTOMJS_PATH)
            else:
                driver = driver or webdriver.PhantomJS()
            driver.get(path)
            if loadjs_wait_for_callback:
                selenium_ui.WebDriverWait(driver, 60).until(loadjs_wait_for_callback)
            time.sleep(loadjs_wait_time)
            return driver.page_source

        else:                                                   # Read page contents from url
            retry_count = 0
            max_retries = 5
            while True:
                try:
                    response = session.get(path, stream=True, timeout=timeout)
                    break
                except (requests.exceptions.ConnectionError, requests.exceptions.ReadTimeout) as e:
                    retry_count += 1
                    print("Error with connection ('{msg}'); about to perform retry {count} of {trymax}."
                        .format(msg=str(e), count=retry_count, trymax=max_retries))
                    time.sleep(retry_count * 1)
                    if retry_count >= max_retries:
                        raise e

            response.raise_for_status()
            return response.content

    except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
        with open(path, 'rb') as fobj:                          # If path is a local file path, try to open the file
            return fobj.read()