def found_captcha(t_driver): doc = Doc(html=t_driver.page_source) iframe_url = "" for item in doc.q(iframeXPath): if item.x("@src") != "": iframe_url = item.x("@src").encode("utf-8").strip() print "URL -> ", iframe_url url = t_driver.current_url print "URL -> ", url if iframe_url != "": print("wait until it show response") WebDriverWait(t_driver, config.DRIVER_WAITING_SECONDS).until( AnyEc( ec.presence_of_element_located( (By.XPATH, "//textarea[contains(@id, 'g-recaptcha-response')]")), )) sitekey = "" try: sitekey = re.search("k\=(.*?)&", iframe_url).group(1) except Exception as e: print e pass print "SiteKey = ", sitekey if sitekey == "": common_lib.phantom_Quit(t_driver) exit() ret_value = solve_recaptcha(t_driver, "g-recaptcha-response", url, sitekey) print "Recatpcha = ", ret_value if (ret_value == config.RECAPTCHA_SOLVED): print "Click Checkbox after solve recaptcha" t_driver.switch_to_default_content() wait() recaptchaSubmitBtnObj = t_driver.find_element_by_xpath( recaptchaSubmitBtnXPath) actions = ActionChains(t_driver) actions.move_to_element(recaptchaSubmitBtnObj) actions.click(recaptchaSubmitBtnObj) actions.perform() wait_medium() return ret_value return None
def recreate_phantom(self): print "+++++++++++++++++ Recreate PhantomJS +++++++++++++++++" common_lib.phantom_Quit(self.phantom_obj["driver"]) driver, self.user_agent, proxy, self.screen_resolution = common_lib.create_phantomjs_driver() # PHANTOMJS PART self.phantom_obj["driver"] = driver
def parse_website(self, selenium_driver_type): self.class_type = config.CLASS_TYPE_TROPICAIR_STR self.parent_url = "https://www.tropicair.com/" driver = self.driver proxy = None try: # Use FF here is_ff = False # lock.acquire() # Get driver if selenium_driver_type == config.DRIVER_VALUE_CHROME: driver = common_lib.create_chrome_driver( ) # GOOGLE CHROME PART elif is_ff: driver = common_lib.create_firefox_driver() # FIREFOX PART elif selenium_driver_type == config.DRIVER_VALUE_PHANTOMJS: driver, self.user_agent, proxy, self.screen_resolution = common_lib.create_phantomjs_driver( ) # PHANTOMJS PART # lock.release() if driver is None: return no_result = 0 stop_day = "" for ind in range(0, len(self.date_list["date"])): date_item = self.date_list["date"][ind] print "Start Date", date_item print "No Result=", no_result self.start_date = { "date": date_item, "status": "none", "error_count": 0 } self.end_date = { 'date': self.start_date["date"] + timedelta(days=1), 'status': 'pending' } stop_day = date_item if no_result >= config.MAX_NO_RESULT_COUNT: print("--------------------------") print("No result any more") print("--------------------------") break bStop = False while bStop == False: try: # driver.get("http://lumtest.com/myip.json") # self.wait() # print driver.page_source # self.wait() print('loading parent page... TropicAir') driver.get(self.parent_url) self.wait_medium() # with open("html/response1.html", 'w') as f: # f.write(driver.page_source.encode('utf-8')) # driver.save_screenshot('screenshot/0.png') print("find iframe in div") WebDriverWait( driver, config.DRIVER_WAITING_SECONDS ).until( AnyEc( EC.presence_of_element_located( (By.XPATH, "//div[@class='tab-content-bg']//iframe" )), )) print("iframe was founded") iframe = driver.find_element_by_xpath( "//div[@class='tab-content-bg']//iframe") driver.switch_to_frame(iframe) cookies = driver.get_cookies() #doc = Doc(html=driver.page_source) # with open("response.html", 'w') as f: # f.write(driver.page_source.encode('utf-8')) print("Find round trip button") WebDriverWait( driver, config.DRIVER_WAITING_SECONDS ).until( AnyEc( EC.presence_of_element_located( (By.XPATH, "//button[contains(text(), 'Round trip')]" )), )) # driver.save_screenshot('screenshot/1.png') # with open("html/response.html", 'w') as f: # f.write(driver.page_source.encode('utf-8')) print("round trip tab click") round_trip_btn = driver.find_element_by_xpath( "//button[contains(text(), 'Round trip')]") round_trip_btn.click() self.wait() # driver.save_screenshot('screenshot/2.png') #self.save_select_departure_arrival(driver) success = False # print("**************************") # print(self.start_date) # print(self.end_date) print("Select start day -> {}".format( self.start_date["date"])) start_date_div = driver.find_element_by_id( "CalendarID0") start_date_div.send_keys("") success = self.select_date(driver, "DateTimePicker0", self.start_date["date"]) self.wait() if success == False: print "Error occurred in DateTimePicker0" self.page_error = config.ERROR_WEBSITE_PROBLEM else: print("Select end day -> {}".format( self.end_date["date"])) end_date_div = driver.find_element_by_id( "CalendarID1") end_date_div.send_keys("") success = self.select_date(driver, "DateTimePicker1", self.end_date["date"]) self.wait() if success == False: print "Error occurred in DateTimePicker1" self.page_error = config.ERROR_WEBSITE_PROBLEM else: # driver.save_screenshot('screenshot/3.png') success = self.select_departure_arrival(driver) if success == False: self.page_error = config.ERROR_WEBSITE_PROBLEM else: # driver.save_screenshot('screenshot/4.png') print("*************************") print("Click submit to get information") submit_btn = driver.find_element_by_xpath( "//button[contains(@class, 'search-criterias-submit')]" ) submit_btn.click() self.wait() print("Wait until show date information") WebDriverWait( driver, config.DRIVER_WAITING_SECONDS ).until( AnyEc( EC.presence_of_element_located(( By.XPATH, "//div[contains(@class, 'day-selection-bar-block dayResume')]" )), EC.presence_of_element_located(( By.XPATH, "//div[contains(@class, 'panel-body text-center')]/h2/span" )), )) # driver.save_screenshot('screenshot/5.png') no_result_div = None try: no_result_div = driver.find_element_by_xpath( "//div[contains(@class, 'panel-body text-center')]/h2/span" ) except Exception as e: pass #self.show_exception_detail(e) if no_result_div == None: print( " Call Parse Round Trip Function") # parse right div to get sum on round trip self.parse_round_trip(driver) else: print( "////////////////No Result/////////////////" ) print(no_result_div) print(self.start_date) no_result += 1 self.date_list["no_result"] += 1 print( "////////////////No Result/////////////////" ) self.page_error = config.ERROR_NONE except TimeoutException as ex: print('***********E1*************') self.show_exception_detail(ex) self.page_error = config.ERROR_TIMEOUT_EXCEPTION except Exception as e: print('***********E2*************') self.show_exception_detail(e) self.page_error = config.ERROR_WEBSITE_PROBLEM if self.page_error == config.ERROR_NONE: self.start_date["status"] = "complete" else: self.start_date["error_count"] += 1 if self.start_date[ "error_count"] >= config.TROPICAIR_SCRAPING_MAX_COUNT: self.start_date["status"] = "complete" else: self.start_date["status"] = "none" if self.start_date["status"] == "complete": bStop = True start_day = self.date_list["date"][0] try: end_day = self.date_list["date"][-1] except: end_day = "" self.date_list["status"] = "complete" lock.acquire() global_sc_obj.save([ "Departure", self.departure, "Arrival", self.arrival, "Start", start_day.strftime("%Y-%m-%d"), "End", end_day.strftime("%Y-%m-%d"), "Stop", stop_day.strftime("%Y-%m-%d"), "No Result", no_result, ], "export_{}.csv".format(self.class_type)) common_lib.phantom_Quit(driver) lock.release() except Exception as e: print "++++++++++++++++++++" self.date_list["error_count"] += 1 if self.date_list[ "error_count"] >= config.TROPICAIR_SCRAPING_MAX_COUNT: self.date_list["status"] = "complete" else: self.date_list["status"] = "none" self.show_exception_detail(e) common_lib.phantom_Quit(driver)
def run(): """Main function to run bot.""" url = "https://www.vinelink.com/" driver = common_lib.create_chrome_driver(True) try: driver.get("http://lumtest.com/myip.json") wait() driver.get(url) wait() print "Wait for Select" WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until( AnyEc( ec.presence_of_element_located((By.XPATH, selectStateXPath)), ec.presence_of_element_located((By.XPATH, errorXPath)), )) errorObj = None try: errorObj = driver.find_element_by_xpath(errorXPath) except Exception as e: print e pass print errorObj if errorObj is not None: print "Exit" exit() print "Input-> {}".format(url) selectStateObj = driver.find_element_by_xpath(selectStateXPath) print "Change Select option as california ..." for option in selectStateObj.find_elements_by_tag_name('option'): if "California" == option.text: option.click() break sleep(config.DRIVER_MEDIUM_WAITING_SECONDS) print "Wait for Offender Button" WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until( AnyEc(ec.presence_of_element_located( (By.XPATH, findOffenderXPath)))) print "click Offender Button" findOffenderObj = driver.find_element_by_xpath(findOffenderXPath) actions = ActionChains(driver) actions.move_to_element(findOffenderObj) actions.click(findOffenderObj) actions.perform() wait_medium() print "wait continue button" WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until( AnyEc(ec.presence_of_element_located((By.XPATH, continueXPath)))) print "click continue button" continueObj = driver.find_element_by_xpath(continueXPath) actions = ActionChains(driver) actions.move_to_element(continueObj) actions.click(continueObj) actions.perform() wait_medium() bookingNo = "5212364" print "wait offender Id" WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until( AnyEc(ec.presence_of_element_located((By.XPATH, offenderIdXPath)))) offenderIdObj = driver.find_element_by_xpath(offenderIdXPath) offenderIdObj.click() wait() offenderIdObj.send_keys(bookingNo) wait() searchBtnObj = driver.find_element_by_xpath(searchBtnXPath) actions = ActionChains(driver) actions.move_to_element(searchBtnObj) actions.click(searchBtnObj) actions.perform() wait() print "wait for IFRAME RECAPTCHA" WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until( AnyEc(ec.presence_of_element_located((By.XPATH, iframeXPath)))) ret_value = found_captcha(driver) if (ret_value == config.RECAPTCHA_SOLVED): # MILLER,CALVIN RAY # LAST, FIRST, MIDDLE # DAVIS, CHANEL # with open("response.html", 'w') as f: # f.write(driver.page_source.encode("utf-8")) # exit() name = "" bFirst = True for i, missing_id in enumerate(missing_id_list): try: doc = Doc(html=driver.page_source) iframe_url = "" for item in doc.q(iframeXPath): if item.x("@src") != "": iframe_url = item.x("@src").encode("utf-8").strip() with open("response_found.html", 'w') as f: f.write(driver.page_source.encode("utf-8")) if iframe_url != "": print "Re-Captcha Found" # print("wait until it show response") # WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until( # AnyEc( # ec.presence_of_element_located( # (By.XPATH, "//textarea[@id='g-recaptcha-response']") # ), # ) # ) # sitekey = "" # try: # sitekey = re.search("k\=(.*?)&", iframe_url).group(1) # except Exception as e: # print e # pass # print "SiteKey = ", sitekey # if sitekey == "": # common_lib.phantom_Quit(t_driver) # exit() # ret_value = solve_recaptcha(driver, "g-recaptcha-response", url, sitekey) # print "Recatpcha = ", ret_value # if (ret_value == config.RECAPTCHA_SOLVED): # print "Click Checkbox after solve recaptcha" ret_value = found_captcha(driver) if (ret_value != config.RECAPTCHA_SOLVED): common_lib.phantom_Quit(driver) exit() # bPass = True # if iframe_url != "": # ret_value = found_captcha(driver) # if ret_value != config.RECAPTCHA_SOLVED: # bPass = False # if bPass == False: # print "Captcha Error" # exit() btnObj = driver.find_element_by_xpath( "//span[contains(@label, 'Search again')]") try: name = driver.find_element_by_xpath( "//div[@label='Offender Name']").get_attribute( "value") # print " Found Name -> ", name, " bFirst =", bFirst missing_id = missing_id_list[i - 1] if (name != "") and (bFirst == False): print "-------------------> Name = ", name, " BookingNo =", missing_id LastName = name.split(",")[0] name_str = name.split(",")[-1].strip() FirstName = name_str.split(" ")[0] booking_history = DashboardVineName( s_bookingno=missing_id, s_lastname=LastName, s_firstname=FirstName, s_captureddate=currentdate, s_capturedtime=currenttime, s_duplication=0) try: db.session.add(booking_history) db.session.commit() except Exception as e: print e except Exception as e: print e pass actions = ActionChains(driver) actions.move_to_element(btnObj) actions.click(btnObj) actions.perform() wait_medium() except: pass bFirst = False # print "wait offender Id", i messageXpath = "//span[contains(text(), 'No offenders matching your criteria were found')]" WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until( AnyEc( ec.presence_of_element_located( (By.XPATH, messageXpath)), ec.presence_of_element_located( (By.XPATH, offenderIdXPath)), )) print "input offender id -> ", missing_id offenderIdObj = driver.find_element_by_xpath(offenderIdXPath) offenderIdObj.clear() wait() offenderIdObj.send_keys(str(missing_id)) wait() searchBtnObj = driver.find_element_by_xpath(searchBtnXPath) actions = ActionChains(driver) actions.move_to_element(searchBtnObj) actions.click(searchBtnObj) actions.perform() wait_medium() print "*********************************" print "Completed" print "*********************************" except Exception as e: # common_lib.phantom_Quit(driver) print e