Beispiel #1
0
def getNewRequests(requestURL, Host):
    import requests
    import Public.PublicFun as PublicFun
    request = requests.Session()

    JobID = PublicFun.createID()
    Chromedriver = PublicFun.getWebDriver("chrome", DataFolderName=JobID)
    Chromedriver.get(requestURL)
    cookies = Chromedriver.get_cookies()
    userAgent = Chromedriver.execute_script("return navigator.userAgent")
    PublicFun.closeWebDriver(JobID, Chromedriver)

    header = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
        'Connection': 'keep-alive',
        'DNT': '1',
        'Host': Host,
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': userAgent
    }
    request.headers.update(header)

    for cookie in cookies:
        request.cookies.set(cookie['name'], cookie['value'])

    return request
Beispiel #2
0
def getPageSize(requestURL):
    JobID=PublicFun.createID()
    Chromedriver=PublicFun.getWebDriver("chrome",DataFolderName=JobID)
    Chromedriver.get(requestURL)
    
    time.sleep(2)
    Soup = bs(Chromedriver.page_source, "html.parser")
    pageSize=len(Soup.select("select.b-clear-border.js-paging-select.gtm-paging-top option"))
    if pageSize==0:
        pageSize=len(Soup.select("select.page-select.js-paging-select.gtm-paging-top option"))
    PublicFun.closeWebDriver(JobID,Chromedriver)
    return pageSize
Beispiel #3
0
from selenium.webdriver.common.action_chains import ActionChains

import Public.PublicFun as PublicFun
import Public.SQLConnect as SQLConnect
import Public.BIASDataIO as BIASDataIO

DBConnect=None
JobID=""
try:
    JobID=sys.argv[1]
    DBConnect=SQLConnect.DBConnect(publicSetting=True)
    DBConnect.ConnectDB()
    DBConnect.StartTransaction()
    
    Chromedriver=PublicFun.getWebDriver("chrome",DataFolderName=JobID)
    Chromedriver.get("https://www.104.com.tw/jobs/search/")

    #找到職務類別的按鈕並點擊
    CategoryListButton=Chromedriver.find_element_by_id("job-cat")
    ActionChains(Chromedriver).click(CategoryListButton).perform()
    time.sleep(5)

    e104menu=Chromedriver.find_element_by_id("e104menu2011_main")
    e104menuCount = len(bs(e104menu.get_attribute('innerHTML'), "html.parser").select("ul li"))

    #逐項取得內容
    for ClassACount in range(e104menuCount):
        #移開選取項目
        tempElement=Chromedriver.find_element_by_id("globalbar")
        ActionChains(Chromedriver).move_to_element(tempElement).perform()