예제 #1
0
def func(dname,aname):
    global startnow #用全局变量把当天日期导进来
    # now 是当天的日期,一般抓取都是从当天开始抓这
    urls = 'https://www.ly.com/flights/itinerary/oneway/'+ str(dname) +'-'+ str(aname) + '?date='+startnow+'&fromairport=&toairport=&from=%E4%B8%8A%E6%B5%B7&to=%E5%8C%97%E4%BA%AC&childticket=0,0'
    global endnow #用全局变量把60天后的日期导进来
    
    f = open('同城旅游.csv','a+',encoding='utf-8',newline='')
    csv_writer = csv.writer(f)
                
    driver=Options()#webdriver.Options()
    driver.add_experimental_option('useAutomationExtension', False)
    driver.add_experimental_option("excludeSwitches", ['enable-automation'])
    prefs = {"profile.managed_default_content_settings.images": 2} # 不加载图片
    driver.add_experimental_option("prefs",prefs)
    driver=webdriver.Chrome(options=driver)
    driver.get(urls)
    while True:
        # 加载完的界面 ,提取整个页面完整的数据源码
        html = driver.page_source
        response = etree.HTML(html)
        #print(u'全部加载完成')
        
        # 通过xpath把加载完的页面数据全部取出来,做一一对应
        for each in response.xpath("//div[@class='dataListContainer']"):
            time.sleep(8)

            flight_id = each.xpath(".//p[@class='flight-item-name']/text()")
            starttime = each.xpath(".//div[@class='f-startTime f-times-con']/strong/text()")
            endtime = each.xpath(".//div[@class='f-endTime f-times-con']/strong/text()")
            #endtime = re.sub(r'\s+','', endtime)
            print(flight_id)
            print(starttime)
            print(endtime)
            dcity = response.xpath("//*[@id='__layout']/div/section/div/div[4]/div[2]/div[1]/strong/text()[1]/text()")
            acity = response.xpath("//*[@id='__layout']/div/section/div/div[4]/div[2]/div[1]/strong/text()[2]/text()")
            print(dcity)
            print(acity)
            dates = response.xpath("//*[@id='__layout']/div/section/div/div[4]/div[2]/div[1]/span/text()")# 出发日期
            print(dates)
            official_price = each.xpath(".//div[@class='head-prices']/strong/em/text()")# 价格
            print(official_price)
예제 #2
0
def func(aname):
    dname = 'XMN'
    #传进来的参数是"FOC"这样,所以完整的url需要拼接
    global now  #用全局变量把当天日期导进来
    # now 是当天的日期,一般抓取都是从当天开始抓这
    #urls = 'https://et.xiamenair.com/xiamenair/book/findFlights.action?lang=zh&tripType=0&queryFlightInfo=' + str(url) + ','+str(now)
    urls = 'https://www.xiamenair.com/zh-cn/nticket.html?tripType=OW&orgCodeArr%5B0%5D=' + str(
        dname
    ) + '&dstCodeArr%5B0%5D=' + str(
        aname
    ) + '&orgDateArr%5B0%5D=' + now + '&dstDate=&isInter=false&adtNum=1&chdNum=0&JFCabinFirst=false&acntCd=&mode=Money&partner=false&jcgm=false'
    global endnow  #用全局变量把6天后的日期导进来

    driver = Options()  #webdriver.Options()
    driver.add_experimental_option('useAutomationExtension', False)
    driver.add_experimental_option("excludeSwitches", ['enable-automation'])
    prefs = {"profile.managed_default_content_settings.images": 2}  # 不加载图片
    driver.add_experimental_option("prefs", prefs)
    driver = webdriver.Chrome(options=driver)
    driver.get(urls)
    while True:

        # 加载完的界面 ,提取整个页面完整的数据源码
        html = driver.page_source
        response = etree.HTML(html)
        print(u'全部加载完成')

        # 通过xpath把加载完的页面数据全部取出来,做一一对应
        i = 0
        for each in response.xpath("//div[@class='left']"):
            time.sleep(5)
            if (i == 0):  #第一条为空
                i = 1
                continue
            print('ok')
            hangci = each.xpath(".//span[@class='flight-num']/text()")
            starttime = each.xpath(
                ".//div[@class='start']/div[@class='time']/text()")
            endtime = each.xpath(
                ".//div[@class='end']/div[@class='time']/text()"
            )  #("normalize-space(.//div[@class='end']/div[@class='time']/text())")
            #endtime = re.sub(r'\s+','', endtime)
            print(hangci)
            print(starttime)
            print(endtime)
            citys = response.xpath(
                "/html/body/div/div[4]/div[2]/div/div/div[1]/div[2]/div/div[1]/span[1]/div/text()"
            )[0]
            print(citys)
            flightDate = response.xpath(
                "/html/body/div/div[4]/div[2]/div/div/div[1]/div[1]/ul/li[4]/p[1]/text()"
            )[0]  #("/html/body/div/div[4]/div[2]/div/div/div[1]/div[2]/div/div[1]/span[2]/text()")[0]  # 出发日期
            print(flightDate)
            official_price = each.xpath(
                ".//div[@class='flight-info clearfix']/div[@class='price']//strong/text()"
            )  # 价格
            print(official_price)

        #driver.quit()
        #break
        #'''
        # 定位下一日期
        date = response.xpath(
            "/html/body/div/div[4]/div[2]/div/div/div[1]/div[1]/ul/li[5]/p[1]/text()"
        )

        # 当下一日期等于指定日期,就停止循环
        if (str(date) == endnow):
            #time.sleep(5)
            driver.quit()
            break
        else:
            # 一直点击下一页
            qq = driver.find_element_by_xpath(
                "/html/body/div/div[4]/div[2]/div/div/div[1]/div[1]/ul/li[5]")
            ActionChains(driver).click(qq).perform()
            time.sleep(5)
예제 #3
0
from selenium import webdriver
from selenium.webdriver import Chrome
from selenium.webdriver.chrome import options
from selenium.webdriver.chrome.options import Options
import json
import pprint
from time import sleep

# driver = webdriver.Chrome()
# driver.get('https://www.espn.com.br/futebol/classificacao/_/liga/BRA.1/ordenar/wins/dir/desce/temporada/2019')
# sleep(3)
op = Options()
op.headless = True
op.add_argument("--window-size=1920x1080")
op = Chrome(options=op)
op.get('https://br.advfn.com/bolsa-de-valores/bovespa/movida-on-MOVI3/cotacao')
op.add_cookie()

linhas = op.find_element_by_id('quote_top')
for Html_linha in linhas.find_elements_by_class_name('boxToolTip'):
    print(Html_linha)

op.close()
op.quit()