Example #1
0
def found_captcha(t_driver):
    doc = Doc(html=t_driver.page_source)

    iframe_url = ""
    for item in doc.q(iframeXPath):
        if item.x("@src") != "":
            iframe_url = item.x("@src").encode("utf-8").strip()

    print "URL -> ", iframe_url

    url = t_driver.current_url
    print "URL -> ", url

    if iframe_url != "":
        print("wait until it show response")
        WebDriverWait(t_driver, config.DRIVER_WAITING_SECONDS).until(
            AnyEc(
                ec.presence_of_element_located(
                    (By.XPATH,
                     "//textarea[contains(@id, 'g-recaptcha-response')]")), ))

        sitekey = ""
        try:
            sitekey = re.search("k\=(.*?)&", iframe_url).group(1)
        except Exception as e:
            print e
            pass

        print "SiteKey = ", sitekey

        if sitekey == "":
            common_lib.phantom_Quit(t_driver)
            exit()

        ret_value = solve_recaptcha(t_driver, "g-recaptcha-response", url,
                                    sitekey)
        print "Recatpcha = ", ret_value

        if (ret_value == config.RECAPTCHA_SOLVED):
            print "Click Checkbox after solve recaptcha"

            t_driver.switch_to_default_content()
            wait()

            recaptchaSubmitBtnObj = t_driver.find_element_by_xpath(
                recaptchaSubmitBtnXPath)
            actions = ActionChains(t_driver)
            actions.move_to_element(recaptchaSubmitBtnObj)
            actions.click(recaptchaSubmitBtnObj)
            actions.perform()
            wait_medium()

        return ret_value

    return None
    def recreate_phantom(self):
        print "+++++++++++++++++ Recreate PhantomJS +++++++++++++++++"
        common_lib.phantom_Quit(self.phantom_obj["driver"])

        driver, self.user_agent, proxy, self.screen_resolution = common_lib.create_phantomjs_driver()  # PHANTOMJS PART
        self.phantom_obj["driver"] = driver
Example #3
0
    def parse_website(self, selenium_driver_type):
        self.class_type = config.CLASS_TYPE_TROPICAIR_STR

        self.parent_url = "https://www.tropicair.com/"

        driver = self.driver
        proxy = None
        try:
            # Use FF here
            is_ff = False

            # lock.acquire()

            # Get driver
            if selenium_driver_type == config.DRIVER_VALUE_CHROME:
                driver = common_lib.create_chrome_driver(
                )  # GOOGLE CHROME PART
            elif is_ff:
                driver = common_lib.create_firefox_driver()  # FIREFOX PART
            elif selenium_driver_type == config.DRIVER_VALUE_PHANTOMJS:
                driver, self.user_agent, proxy, self.screen_resolution = common_lib.create_phantomjs_driver(
                )  # PHANTOMJS PART

            # lock.release()
            if driver is None:
                return

            no_result = 0
            stop_day = ""

            for ind in range(0, len(self.date_list["date"])):
                date_item = self.date_list["date"][ind]
                print "Start Date", date_item
                print "No Result=", no_result

                self.start_date = {
                    "date": date_item,
                    "status": "none",
                    "error_count": 0
                }
                self.end_date = {
                    'date': self.start_date["date"] + timedelta(days=1),
                    'status': 'pending'
                }

                stop_day = date_item
                if no_result >= config.MAX_NO_RESULT_COUNT:
                    print("--------------------------")
                    print("No result any more")
                    print("--------------------------")
                    break

                bStop = False
                while bStop == False:

                    try:
                        # driver.get("http://lumtest.com/myip.json")
                        # self.wait()

                        # print driver.page_source
                        # self.wait()

                        print('loading parent page... TropicAir')

                        driver.get(self.parent_url)
                        self.wait_medium()
                        # with open("html/response1.html", 'w') as f:
                        #     f.write(driver.page_source.encode('utf-8'))
                        # driver.save_screenshot('screenshot/0.png')

                        print("find iframe in div")
                        WebDriverWait(
                            driver, config.DRIVER_WAITING_SECONDS
                        ).until(
                            AnyEc(
                                EC.presence_of_element_located(
                                    (By.XPATH,
                                     "//div[@class='tab-content-bg']//iframe"
                                     )), ))

                        print("iframe was founded")
                        iframe = driver.find_element_by_xpath(
                            "//div[@class='tab-content-bg']//iframe")
                        driver.switch_to_frame(iframe)
                        cookies = driver.get_cookies()
                        #doc = Doc(html=driver.page_source)
                        # with open("response.html", 'w') as f:
                        #     f.write(driver.page_source.encode('utf-8'))

                        print("Find round trip button")
                        WebDriverWait(
                            driver, config.DRIVER_WAITING_SECONDS
                        ).until(
                            AnyEc(
                                EC.presence_of_element_located(
                                    (By.XPATH,
                                     "//button[contains(text(), 'Round trip')]"
                                     )), ))

                        # driver.save_screenshot('screenshot/1.png')
                        # with open("html/response.html", 'w') as f:
                        #     f.write(driver.page_source.encode('utf-8'))

                        print("round trip tab click")
                        round_trip_btn = driver.find_element_by_xpath(
                            "//button[contains(text(), 'Round trip')]")
                        round_trip_btn.click()
                        self.wait()
                        # driver.save_screenshot('screenshot/2.png')
                        #self.save_select_departure_arrival(driver)

                        success = False

                        # print("**************************")
                        # print(self.start_date)
                        # print(self.end_date)

                        print("Select start day -> {}".format(
                            self.start_date["date"]))
                        start_date_div = driver.find_element_by_id(
                            "CalendarID0")
                        start_date_div.send_keys("")
                        success = self.select_date(driver, "DateTimePicker0",
                                                   self.start_date["date"])
                        self.wait()

                        if success == False:
                            print "Error occurred in DateTimePicker0"
                            self.page_error = config.ERROR_WEBSITE_PROBLEM
                        else:
                            print("Select end day -> {}".format(
                                self.end_date["date"]))
                            end_date_div = driver.find_element_by_id(
                                "CalendarID1")
                            end_date_div.send_keys("")
                            success = self.select_date(driver,
                                                       "DateTimePicker1",
                                                       self.end_date["date"])
                            self.wait()

                            if success == False:
                                print "Error occurred in DateTimePicker1"
                                self.page_error = config.ERROR_WEBSITE_PROBLEM
                            else:
                                # driver.save_screenshot('screenshot/3.png')
                                success = self.select_departure_arrival(driver)
                                if success == False:
                                    self.page_error = config.ERROR_WEBSITE_PROBLEM
                                else:
                                    # driver.save_screenshot('screenshot/4.png')
                                    print("*************************")
                                    print("Click submit to get information")
                                    submit_btn = driver.find_element_by_xpath(
                                        "//button[contains(@class, 'search-criterias-submit')]"
                                    )
                                    submit_btn.click()
                                    self.wait()

                                    print("Wait until show date information")
                                    WebDriverWait(
                                        driver, config.DRIVER_WAITING_SECONDS
                                    ).until(
                                        AnyEc(
                                            EC.presence_of_element_located((
                                                By.XPATH,
                                                "//div[contains(@class, 'day-selection-bar-block dayResume')]"
                                            )),
                                            EC.presence_of_element_located((
                                                By.XPATH,
                                                "//div[contains(@class, 'panel-body text-center')]/h2/span"
                                            )),
                                        ))
                                    # driver.save_screenshot('screenshot/5.png')
                                    no_result_div = None
                                    try:
                                        no_result_div = driver.find_element_by_xpath(
                                            "//div[contains(@class, 'panel-body text-center')]/h2/span"
                                        )
                                    except Exception as e:
                                        pass
                                        #self.show_exception_detail(e)

                                    if no_result_div == None:
                                        print(
                                            " Call Parse Round Trip Function")
                                        # parse right div to get sum on round trip
                                        self.parse_round_trip(driver)
                                    else:
                                        print(
                                            "////////////////No Result/////////////////"
                                        )
                                        print(no_result_div)
                                        print(self.start_date)
                                        no_result += 1
                                        self.date_list["no_result"] += 1
                                        print(
                                            "////////////////No Result/////////////////"
                                        )

                                    self.page_error = config.ERROR_NONE
                    except TimeoutException as ex:
                        print('***********E1*************')
                        self.show_exception_detail(ex)
                        self.page_error = config.ERROR_TIMEOUT_EXCEPTION
                    except Exception as e:
                        print('***********E2*************')
                        self.show_exception_detail(e)
                        self.page_error = config.ERROR_WEBSITE_PROBLEM

                    if self.page_error == config.ERROR_NONE:
                        self.start_date["status"] = "complete"
                    else:
                        self.start_date["error_count"] += 1

                        if self.start_date[
                                "error_count"] >= config.TROPICAIR_SCRAPING_MAX_COUNT:
                            self.start_date["status"] = "complete"
                        else:
                            self.start_date["status"] = "none"

                    if self.start_date["status"] == "complete":
                        bStop = True

            start_day = self.date_list["date"][0]
            try:
                end_day = self.date_list["date"][-1]
            except:
                end_day = ""

            self.date_list["status"] = "complete"

            lock.acquire()
            global_sc_obj.save([
                "Departure",
                self.departure,
                "Arrival",
                self.arrival,
                "Start",
                start_day.strftime("%Y-%m-%d"),
                "End",
                end_day.strftime("%Y-%m-%d"),
                "Stop",
                stop_day.strftime("%Y-%m-%d"),
                "No Result",
                no_result,
            ], "export_{}.csv".format(self.class_type))

            common_lib.phantom_Quit(driver)
            lock.release()

        except Exception as e:
            print "++++++++++++++++++++"
            self.date_list["error_count"] += 1

            if self.date_list[
                    "error_count"] >= config.TROPICAIR_SCRAPING_MAX_COUNT:
                self.date_list["status"] = "complete"
            else:
                self.date_list["status"] = "none"

            self.show_exception_detail(e)
            common_lib.phantom_Quit(driver)
Example #4
0
def run():
    """Main function to run bot."""
    url = "https://www.vinelink.com/"
    driver = common_lib.create_chrome_driver(True)
    try:

        driver.get("http://lumtest.com/myip.json")
        wait()

        driver.get(url)
        wait()

        print "Wait for Select"
        WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until(
            AnyEc(
                ec.presence_of_element_located((By.XPATH, selectStateXPath)),
                ec.presence_of_element_located((By.XPATH, errorXPath)),
            ))

        errorObj = None
        try:
            errorObj = driver.find_element_by_xpath(errorXPath)
        except Exception as e:
            print e
            pass

        print errorObj
        if errorObj is not None:
            print "Exit"
            exit()

        print "Input-> {}".format(url)

        selectStateObj = driver.find_element_by_xpath(selectStateXPath)

        print "Change Select option as california ..."
        for option in selectStateObj.find_elements_by_tag_name('option'):
            if "California" == option.text:
                option.click()
                break

        sleep(config.DRIVER_MEDIUM_WAITING_SECONDS)

        print "Wait for Offender Button"
        WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until(
            AnyEc(ec.presence_of_element_located(
                (By.XPATH, findOffenderXPath))))

        print "click Offender Button"
        findOffenderObj = driver.find_element_by_xpath(findOffenderXPath)
        actions = ActionChains(driver)
        actions.move_to_element(findOffenderObj)
        actions.click(findOffenderObj)
        actions.perform()
        wait_medium()

        print "wait continue button"
        WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until(
            AnyEc(ec.presence_of_element_located((By.XPATH, continueXPath))))
        print "click continue button"
        continueObj = driver.find_element_by_xpath(continueXPath)
        actions = ActionChains(driver)
        actions.move_to_element(continueObj)
        actions.click(continueObj)
        actions.perform()
        wait_medium()

        bookingNo = "5212364"

        print "wait offender Id"
        WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until(
            AnyEc(ec.presence_of_element_located((By.XPATH, offenderIdXPath))))

        offenderIdObj = driver.find_element_by_xpath(offenderIdXPath)
        offenderIdObj.click()
        wait()

        offenderIdObj.send_keys(bookingNo)
        wait()

        searchBtnObj = driver.find_element_by_xpath(searchBtnXPath)
        actions = ActionChains(driver)
        actions.move_to_element(searchBtnObj)
        actions.click(searchBtnObj)
        actions.perform()
        wait()

        print "wait for IFRAME RECAPTCHA"
        WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until(
            AnyEc(ec.presence_of_element_located((By.XPATH, iframeXPath))))

        ret_value = found_captcha(driver)

        if (ret_value == config.RECAPTCHA_SOLVED):

            # MILLER,CALVIN  RAY
            # LAST, FIRST, MIDDLE

            # DAVIS, CHANEL
            # with open("response.html", 'w') as f:
            #     f.write(driver.page_source.encode("utf-8"))

            # exit()

            name = ""
            bFirst = True

            for i, missing_id in enumerate(missing_id_list):
                try:
                    doc = Doc(html=driver.page_source)

                    iframe_url = ""
                    for item in doc.q(iframeXPath):
                        if item.x("@src") != "":
                            iframe_url = item.x("@src").encode("utf-8").strip()

                    with open("response_found.html", 'w') as f:
                        f.write(driver.page_source.encode("utf-8"))

                    if iframe_url != "":
                        print "Re-Captcha Found"

                        # print("wait until it show response")
                        # WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until(
                        #     AnyEc(
                        #         ec.presence_of_element_located(
                        #             (By.XPATH, "//textarea[@id='g-recaptcha-response']")
                        #         ),
                        #     )
                        # )

                        # sitekey = ""
                        # try:
                        #     sitekey = re.search("k\=(.*?)&", iframe_url).group(1)
                        # except Exception as e:
                        #     print e
                        #     pass

                        # print "SiteKey = ", sitekey

                        # if sitekey == "":
                        #     common_lib.phantom_Quit(t_driver)
                        #     exit()

                        # ret_value = solve_recaptcha(driver, "g-recaptcha-response", url, sitekey)
                        # print "Recatpcha = ", ret_value

                        # if (ret_value == config.RECAPTCHA_SOLVED):
                        #     print "Click Checkbox after solve recaptcha"

                        ret_value = found_captcha(driver)
                        if (ret_value != config.RECAPTCHA_SOLVED):
                            common_lib.phantom_Quit(driver)
                            exit()

                    # bPass = True

                    # if iframe_url != "":
                    #     ret_value = found_captcha(driver)

                    #     if ret_value != config.RECAPTCHA_SOLVED:
                    #         bPass = False

                    # if bPass == False:
                    #     print "Captcha Error"
                    #     exit()

                    btnObj = driver.find_element_by_xpath(
                        "//span[contains(@label, 'Search again')]")

                    try:
                        name = driver.find_element_by_xpath(
                            "//div[@label='Offender Name']").get_attribute(
                                "value")
                        # print " Found Name -> ", name, " bFirst =", bFirst
                        missing_id = missing_id_list[i - 1]

                        if (name != "") and (bFirst == False):
                            print "-------------------> Name = ", name, " BookingNo =", missing_id

                            LastName = name.split(",")[0]
                            name_str = name.split(",")[-1].strip()
                            FirstName = name_str.split(" ")[0]

                            booking_history = DashboardVineName(
                                s_bookingno=missing_id,
                                s_lastname=LastName,
                                s_firstname=FirstName,
                                s_captureddate=currentdate,
                                s_capturedtime=currenttime,
                                s_duplication=0)

                            try:
                                db.session.add(booking_history)
                                db.session.commit()
                            except Exception as e:
                                print e

                    except Exception as e:
                        print e
                        pass

                    actions = ActionChains(driver)
                    actions.move_to_element(btnObj)
                    actions.click(btnObj)
                    actions.perform()
                    wait_medium()
                except:
                    pass

                bFirst = False
                # print "wait offender Id", i
                messageXpath = "//span[contains(text(), 'No offenders matching your criteria were found')]"

                WebDriverWait(driver, config.DRIVER_WAITING_SECONDS).until(
                    AnyEc(
                        ec.presence_of_element_located(
                            (By.XPATH, messageXpath)),
                        ec.presence_of_element_located(
                            (By.XPATH, offenderIdXPath)),
                    ))

                print "input offender id -> ", missing_id
                offenderIdObj = driver.find_element_by_xpath(offenderIdXPath)
                offenderIdObj.clear()
                wait()
                offenderIdObj.send_keys(str(missing_id))
                wait()

                searchBtnObj = driver.find_element_by_xpath(searchBtnXPath)
                actions = ActionChains(driver)
                actions.move_to_element(searchBtnObj)
                actions.click(searchBtnObj)
                actions.perform()
                wait_medium()

            print "*********************************"
            print "Completed"
            print "*********************************"

    except Exception as e:
        # common_lib.phantom_Quit(driver)
        print e