def runSkyScannerHK(): log.v("start to run") if data.returnDate.__eq__("") | data.departDate.__eq__("") | data.departCityCode.__eq__("") | data.arriveCityCode.__eq__(""): log.v("invalid param") return return_flight.setParamData(data) return_flight.runTask()
def runTask(): filename = param.departCityCode + "-" + param.arriveCityCode + "-" + param.departDate + "-" + param.returnDate + "-" + str(int(time.mktime(datetime.datetime.now().timetuple()))) csvUtil.filename = filename csvUtil.createReturnFile(filename) log.v("csv fire created, now waiting for internet response") # try: # # PROXY = "127.0.0.1:1080" # chrome_options = webdriver.ChromeOptions() # chrome_options.add_argument('--proxy-server={0}'.format(PROXY)) # driver = webdriver.Chrome(chrome_options=chrome_options) # # # driver = webdriver.Chrome() # # driver.get(param.createReturnUrl()) # # # element = WebDriverWait(driver, 90).until( # # EC.presence_of_element_located((By.CSS_SELECTOR, "article"))) # time.sleep(20) # # driver.implicitly_wait(30) # articles = driver.find_elements_by_css_selector("article") # log.v("found articles") # log.v("end driver wait") # time.sleep(5) # log.v("end wait") # # getInfo(articles) # finally: # time.sleep(10) # driver.quit() # # # PROXY = "127.0.0.1:1080" # chrome_options = webdriver.ChromeOptions() # chrome_options.add_argument('--proxy-server={0}'.format(PROXY)) # # driver = webdriver.Chrome(chrome_options=chrome_options) # driver = webdriver.Chrome() # driver.delete_all_cookies() profile = webdriver.FirefoxProfile() profile.native_events_enabled = True driver = webdriver.Firefox(profile) driver.get(param.createReturnUrl()) # element = WebDriverWait(driver, 90).until( # EC.presence_of_element_located((By.CSS_SELECTOR, "article"))) time.sleep(50) # driver.implicitly_wait(30) log.v("end driver wait") getInfo(driver) time.sleep(200)
def setReturnParams(departure, destination, departDate, returnDate): data.departCityCode = departure data.arriveCityCode = destination data.departDate = departDate data.returnDate = returnDate log.printLine() log.i("departure", departure) log.i("destination", destination) log.i("departDate", departDate) log.i("returnDate", returnDate) log.printLine() log.v("set params done")
def check_web(departure, arrival): log.v("start running") try: PROXY = ip_list.ips[0] chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--proxy-server={0}'.format(PROXY)) driver = webdriver.Chrome(chrome_options=chrome_options) # driver.delete_all_cookies() driver.get(url) # element = WebDriverWait(driver, 90).until( # EC.presence_of_element_located((By.CLASS_NAME, "day-list-item")) # ) log.v("end driver waiting") time.sleep(30) # element = WebDriverWait(driver, 90).until( # EC.presence_of_element_located((By.CLASS_NAME, "day-list-item")) # ) # time.sleep(10) log.v("end waiting") getInfo(driver) # except Exception as e: # print(e) # driver.quit() finally: time.sleep(5) driver.quit() log.v("end")
def getInfo(driver): articles = driver.find_elements_by_class_name("day-list-item") log.v("start") for article in articles: log.v("get articles") # try: time.sleep(5) article.click() log.v("click open") time.sleep(5) popup_panel = driver.find_element_by_id("details-mobile-panel") log.v("found details-mobile-panel") # flight_content = popup_session.find_elements_by_class_name("fss-bp-itinerary") # print(">>>found fss-bp-itinerary") # count = 0 # real_content = webdriver # for popup_session in popup_sessions: # print(">>>found details-mobile-panel count") # try: # count += 1 # popup_session.find_element_by_class_name("fss-panel-content") # real_content = popup_session # break # except: # print(count) legs = popup_panel.find_elements_by_class_name("itinerary-leg") for leg in legs: time.sleep(5) leg.click() log.v("click leg") airlines = leg.find_elements_by_class_name("operated-by") for airline in airlines: log.i("airline", airline.text) depart = leg.find_element_by_class_name("departure") log.i("depart time", depart.find_element_by_class_name("times").text) log.i("depart airport", depart.find_element_by_class_name("route").text) destination = leg.find_element_by_class_name("destination") log.i("arrive time", destination.find_element_by_class_name("times").text) log.i("arrive airport", destination.find_element_by_class_name("route").text) prices = popup_panel.find_elements_by_class_name("price") for price in prices: log.i("price", price.text) # a = popup_session.find_element_by_class_name("fss-panel-content") log.v("click legs") time.sleep(5) driver.find_element_by_id("fss-overlay").click() log.v("click close")
from webspider.src.spider.wsdomestic import check_web from webspider.src.spider import skyscanner from webspider.src.logger import log from webspider.src import run_spider # this is the main entrance of the program # check_web("") log.w("test Warning") log.e("test Error") log.v("test Verbose") log.i("test Info", "info") # skyscanner.check_web("", "") departure = "can" # departure airport code destination = "sha" # destination airport code departDate = "170613" # depart date (format: YYMMDD) returnDate = "170701" # arrive date (format: YYMMDD) run_spider.setReturnParams(departure, destination, departDate, returnDate) run_spider.runSkyScannerHK()
def main(): log.v("program is starting and waiting for web respond") return_flight.setParamData(data) return_flight.runTask()
def getInfo(driver): articles = driver.find_elements_by_css_selector("article") log.v("found articles") time.sleep(5) log.v("end wait") # articles = driver.find_elements_by_class_name("day-list-item") log.v("start") log.v("get articles") for article in articles: entity = return_entity log.v("article start") time.sleep(2) sections = article.find_elements_by_css_selector("section") log.v("found sections") count = 0 for section in sections: # time.sleep(3) try: bigairline = section.find_element_by_class_name("big-airline") log.v("found big-airline") except: continue # time.sleep(1) try: airline = bigairline.find_element_by_class_name("text-sm") log.v(airline.text) if count == 0: entity.departairline = airline.text else: entity.returnairline = airline.text log.v("found text-sm") except: try: airline = bigairline.find_element_by_class_name("big") log.v(airline.get_attribute("alt")) if count == 0: entity.departairline = airline.get_attribute("alt") else: entity.returnairline = airline.get_attribute("alt") log.v("found img") except: continue finally: log.v("count:" + str(count)) try: # time.sleep(1) stations = section.find_elements_by_class_name("station-tooltip") log.v("found station-tooltip") for i in range(2): times = stations.__getitem__(i).find_element_by_class_name("times") airport = stations.__getitem__(i).find_element_by_class_name("stop-station") log.v("time:" + times.text) log.v("airport:" + airport.text) if count == 0: if i == 0 : entity.departtime = times.text entity.departairport = airport.text else: entity.arrivetime = times.text entity.arriveairport = airport.text else: if i == 0 : entity.returndeparttime = times.text entity.returndepartairport = airport.text else: entity.returnarrivetime = times.text entity.returnarriveairport = airport.text except: continue finally: count = count + 1 log.v("section end") price = article.find_element_by_class_name("mainquote-price") log.v("price:"+price.text) entity.cheapestprice1 = price.text entity.print() csvUtil.writeReturnEntity(entity) log.v("article end") log.v("page end") try: next = driver.find_element_by_class_name("day-list-pagination").find_element_by_class_name("next").find_element_by_css_selector("button") log.v("found next button end") next.click() log.v("go to next page") time.sleep(2) getInfo(driver) except: log.v("finish") driver.quit()
def setParamData(data): param = data mUrl = data.createReturnUrl() log.v(param.createReturnUrl())