def Buda_ingreso(): lib = Selenium() lib.open_available_browser('https://www.buda.com/ingreso') lib.input_text("id:user_email", '*****@*****.**') lib.input_text("id:user_password", 'yrg3t6j9D') lib.wait_and_click_button('xpath://button[@ng-show="!signinForm.$validating"]') time.sleep(120) return
def _wait_response(self, window_width: int, window_height: int, timeout: Optional[int]) -> Dict: """Open a browser to the created form and wait for the user to submit it. """ browser = Selenium() def is_browser_open(): try: return bool(browser.driver.current_window_handle) except Exception: # pylint: disable=broad-except return False try: browser.open_available_browser(self.server_address) browser.set_window_size(window_width, window_height) start_time = time.time() while True: response = requests.get( f"{self.server_address}/requestresponse", headers={"Prefer": "wait=120"}, ) if response.status_code == 200: return response.json() elif response.status_code != 304: response.raise_for_status() elif not is_browser_open(): raise RuntimeError("Browser closed by user") elif timeout and time.time() > start_time + int(timeout): raise RuntimeError("No response within timeout") else: time.sleep(1) finally: browser.close_browser()
class WebProcess: def __init__(self, url: str, directories, path_to_excel_output_file): self.url = url self.directories = directories self.agencies = list() self.selected_agency = None self.individual_investments_of_agency = list() self.browser = Selenium() self.excel = ExcelUtility(path_to_excel_output_file) def set_and_open_the_website(self): self.browser.set_download_directory( directory=self.directories['output']) self.browser.open_available_browser(self.url) self.browser.maximize_browser_window() def close_the_website(self): self.browser.close_all_browsers() def scrape_and_get_agencies_data(self): main_page_object = MainPageObject(self.browser) main_page_object.click_to_dive_in() self.agencies = main_page_object.get_agencies_data() print(str(len(self.agencies)) + " items scraped from url " + self.url) self.write_agencies_to_excel_file(self.agencies) def select_agency(self, name: str): self.selected_agency = next( filter(lambda x: x.name == name, self.agencies)) print("'" + self.selected_agency.name + "' page has been selected and opened by url: " + self.selected_agency.link) def scrape_data_table_of_agency(self): agency_page_object = AgencyPageObject(self.browser, self.selected_agency.link) agency_page_object.go_to_page() print( "The scraping process of 'Individual Investments' table was started" ) agency_page_object.wait_until_table_load() is_next_btn_active = True start_row_of_excel_file = 2 while is_next_btn_active: agency_page_object.get_current_page_number() individual_investments_page_data = list() print("Page #{}".format(agency_page_object.current_page_number)) try: individual_investments_page_data = agency_page_object.get_data_from_tr_elements( ) print("Scraped") start_row_of_excel_file = self.write_indv_invst_page_data_to_excel_file( individual_investments_page_data, start_row_of_excel_file) except Exception as ex: print(str(ex)) self.individual_investments_of_agency += individual_investments_page_data is_next_btn_active = agency_page_object.check_is_next_btn_active() if is_next_btn_active: agency_page_object.click_on_next_btn() print("Items of scraping process: " + str(len(self.individual_investments_of_agency))) print("The scraping process was finished") def download_pdf_files(self): print("The pdf downloading process was started") for ind_inv_item in self.individual_investments_of_agency: if ind_inv_item.uii_link != '': try: uii_page_object = UIIPageObject( browser=self.browser, link=ind_inv_item.uii_link, path_to_pdfs_dir=self.directories['output'], uii=ind_inv_item.uii) uii_page_object.download_file() print("Pdf from '" + ind_inv_item.uii_link + "' was downloaded") self.extract_section_from_pdf( uii_page_object.path_to_pdf_file, [ind_inv_item.investment_title, ind_inv_item.uii]) except Exception as ex: print(str(ex)) print("The pdf downloading process was finished") def write_agencies_to_excel_file(self, agencies: []): try: self.excel.open_file() self.excel.set_active_sheet(config.AGENCIES_SHEET) self.excel.write_agencies_to_file(agencies) except Exception as ex: print("Unable to write data in the excel file." + str(ex)) finally: self.excel.save_and_close_file() def write_indv_invst_page_data_to_excel_file(self, indv_invst_page_data: [], start_row: int = 2): try: self.excel.open_file() self.excel.set_active_sheet(config.INDIVIDUAL_INVESTMENTS_SHEET) last_row = self.excel.write_table_page_data_to_file( indv_invst_page_data, start_row) return last_row except Exception as ex: print("Unable to write data in the excel file." + str(ex)) return start_row finally: self.excel.save_and_close_file() def extract_section_from_pdf(self, pdf_file_path: str, values_to_compare: []): try: first_page_text: str = PdfUtility.extract_data(pdf_file_path).get( 1) section_a_content: str = first_page_text.partition( "Section A")[2].partition("Section B")[0].lower() print("'Section A' was extracted from pdf ") is_investment_title_exist: bool = (section_a_content.find( values_to_compare[0].lower()) != -1) is_uii_exist = (section_a_content.find( values_to_compare[1].lower()) != -1) print( "'Investment Title' == 'Name of this Investment' result => " + str(is_investment_title_exist)) print("'UII' == 'Unique Investment Identifier (UII)' result => " + str(is_uii_exist)) except Exception as ex: print("Error pdf process. Reason: " + str(ex))
def request_response( self, formspec: str = None, window_width: int = 600, window_height: int = 1000 ) -> dict: """Start server and show form. Waits for user response. :param formspec: form json specification file, defaults to None :param window_width: window width in pixels, defaults to 600 :param window_height: window height in pixels, defaults to 1000 :return: form response Example: .. code-block:: robotframework Create Form ${CURDIR}/${/}myform.json &{response} Request Response """ self._start_attended_server() if self.custom_form is None: self.create_form("Requesting response") if formspec: formdata = open(formspec, "rb") else: formdata = json.dumps(self.custom_form) headers = {"Accept": "application/json", "Content-Type": "application/json"} requests.post( f"{self.server_address}/formspec", data=formdata, headers=headers, ) response_json = {} try: br = Selenium() br.open_available_browser(f"{self.server_address}/form.html") br.set_window_size(window_width, window_height) headers = {"Prefer": "wait=120"} # etag = None while True: # if etag: # headers['If-None-Match'] = etag headers["If-None-Match"] = "2434432243" response = requests.get( f"{self.server_address}/requestresponse", headers=headers ) # etag = response.headers.get("ETag") if response.status_code == 200: try: response_json = response.json() break except JSONDecodeError: break elif response.status_code != 304: # back off if the server is throwing errors time.sleep(10) continue time.sleep(1) finally: br.close_browser() self._stop_attended_server() return response_json