class SeleniumRuntime: """ This class works like a singleton containing a single instance of the browser environment Attributes: logger: logger instance gathered from logging module, acts like a singleton """ def __init__(self): self.logger = logging.getLogger(LOGGER_INSTANCE) if TARGET_BROWSER == 'chrome': self.browser = Chrome() elif TARGET_BROWSER == 'firefox': self.browser = Firefox() elif TARGET_BROWSER == 'edge': self.browser = Edge() def go_to_page(self, url): self.browser.get(url) def submit_form(self): form = self.browser.find_element_by_tag_name('form') form.submit() def fill_form(self, table): for field, value in table.items(): element = self.browser.find_element_by_id(field) element.clear() element.send_keys(value) def fill_selects(self, table): for field_name, field_value in table.items(): self.wait_for_element(field_value, By.XPATH).click() def click(self, value, by=By.ID): element = self.browser.find_element(by, value) element.click() def get_element(self, value, by=By.ID): return self.browser.find_element(by, value) def get_elements(self, value, by=By.ID): return self.browser.find_elements(by, value) def assert_presence(self, value, by=By.ID): try: self.browser.find_element(by, value) return True except NoSuchElementException: return False def back(self): self.browser.back() def forward(self): self.browser.forward() def refresh(self): self.browser.refresh() def current_title(self): return self.browser.title def current_url(self): return self.browser.current_url def wait_for_element(self, value, by=By.ID, timeout=30): return WebDriverWait(self.browser, timeout).until( expected_conditions.presence_of_element_located((by, value))) def wait_for_redirect(self, target_url, timeout=30): return WebDriverWait(self.browser, timeout).until( expected_conditions.url_to_be(target_url)) @staticmethod def assert_class(element, class_name): class_attr = element.get_attribute('class') return class_attr.find(class_name) >= 0 @staticmethod def assert_attribute(element, attribute_name, attribute_value): attr_value = element.get_attribute(attribute_name) return attr_value.find(attribute_value) >= 0
def getcomponies(): """ Get Companies from web and write to excel file :return: """ _bases.kill_web_driver_edge() driver = Edge() componies = [] driver.get('https://www.nosab.org.tr/firmalar/tr') alphabetslinks = [] for links in driver.find_elements(By.XPATH, '//*[@id="accordion-2"]/li/a'): link = { 'Sector': links.text, 'Name': links.get_attribute('href') } alphabetslinks.append(link) for anchor in alphabetslinks: driver.get(anchor['Name']) companies_sector = { 'Sector': anchor['Sector'], 'comps': [] } componies_count = len(driver.find_elements(By.XPATH, '/html/body/div[7]/div/div[2]/div[3]/ul/li/a')) for indx in range(1, componies_count + 1): comp = driver.find_element(By.XPATH, f'/html/body/div[7]/div/div[2]/div[3]/ul/li[{indx}]/a') comp.click() companies_sector['Sector'] = anchor['Sector'] company = { 'Name': driver.find_element(By.XPATH, '/html/body/div[7]/div/div[2]/div[1]/div').text, 'Data': str(driver.find_element(By.XPATH, '/html/body/div[7]/div/div[2]/div[4]').text) } companies_sector['comps'].append(company) driver.back() componies.append(companies_sector) row = 0 workbook = Workbook(excel_file_name) worksheet = workbook.add_worksheet('nosab') hformat = workbook.add_format() hformat.set_bold() hformat.set_align('center') hformat.set_align('vcenter') worksheet.write(row, 0, 'Firma Adi', hformat) worksheet.set_column('A:A', 100) worksheet.write(row, 1, 'Bilgileri', hformat) worksheet.set_column('B:B', 120) row += 1 fwarp = workbook.add_format() fwarp.set_text_wrap() fname_centralize = workbook.add_format() fname_centralize.set_align('center') for company in componies: if 'Sector' in company: worksheet.write(row, 0, company['Sector'], hformat) row += 1 if 'comps' in company: for comp in company['comps']: if 'Name' in comp: worksheet.write(row, 0, comp['Name'], fname_centralize) if 'Data' in comp: worksheet.write(row, 1, comp['Data'], fwarp) row += 1 if os.path.exists(excel_file_name): os.remove(excel_file_name) time.sleep(_bases.timeout) workbook.close() driver.close()
def get_componies(): _bases.kill_web_driver_edge() driver = Edge() componies = [] # region > info type headers variables sector = 'Faaliyet Alanı ' mail = 'Epostalar ' phone = 'Telefonlar ' # endregion driver.get('http://www.kosab.org.tr/FIRMALAR/') # Get table rows count pagination_count = len( driver.find_elements( By.XPATH, '/html/body/div[2]/div/div/span[2]/table/tbody/tr')) pagination_links = [] anchors = driver.find_elements( By.XPATH, f'/html/body/div[2]/div/div/span[2]/table/tbody/tr[{pagination_count}]/td/a' ) for pagelink in anchors: pagination_links.append(pagelink.get_attribute('href')) for page in pagination_links: driver.get(page) def get_comps_row(): compoinesrows = driver.find_elements( By.XPATH, '/html/body/div[2]/div/div/span[2]/table/tbody/tr') return compoinesrows for row in range(1, len(get_comps_row()) - 1): company = {} compsrow = get_comps_row() compsrow[row].click() datatable = driver.find_elements( By.XPATH, '/html/body/div[2]/div/div/span[2]/table/tbody/tr') for cell in range(0, len(datatable) - 1): datastr = str(datatable[cell].text) if cell == 0: company['Name'] = datatable[cell].text if datastr.startswith(sector): company['Sector'] = str( datatable[cell].text).split(sector)[1] if datastr.startswith(mail): company['Mail'] = str(datatable[cell].text).split(mail)[1] if datastr.startswith(phone): company['Tel'] = str(datatable[cell].text).split(phone)[1] componies.append(company) driver.back() # region > Write excel row = 0 workbook = Workbook(excel_file_name) worksheet = workbook.add_worksheet('Kosab') hformat = workbook.add_format() hformat.set_bold() hformat.set_align('center') hformat.set_align('vcenter') hformat.set_font_color('white') hformat.set_bg_color('blue') worksheet.write(0, 0, 'Firma Adi', hformat) worksheet.write(0, 1, 'Mail Adresi', hformat) worksheet.write(0, 2, 'Sektor', hformat) worksheet.write(0, 3, 'Telefon', hformat) worksheet.set_column('A:A', 70) worksheet.set_column('B:B', 40) worksheet.set_column('C:C', 40) worksheet.set_column('D:D', 30) row += 1 for cmpy in componies: if 'Name' in cmpy: worksheet.write(row, 0, str(cmpy['Name'])) if 'Mail' in cmpy: worksheet.write(row, 1, str(cmpy['Mail'])) if 'Sector' in cmpy: worksheet.write(row, 2, str(cmpy['Sector'])) if 'Tel' in cmpy: worksheet.write(row, 3, str(cmpy['Tel'])) row += 1 if os.path.exists(excel_file_name): os.remove(excel_file_name) time.sleep(2) workbook.close()
def get_componies(): """ Get Companies from web and write to excel file :return: """ _bases.kill_web_driver_edge() driver = Edge() componies = [] driver.get('https://bursaderiosb.com/uyeler') componies_data = driver.find_elements( By.XPATH, '//*[@id="main"]/div/div/div[2]/div') compscount = None if len(componies_data) > 0: compscount = len(componies_data) for i in range(1, compscount): company_anchor = driver.find_element( By.XPATH, f'//*[@id="main"]/div/div/div[2]/div[{i}]/div[2]/a') company_anchor.click() company = { 'Name': driver.find_element(By.XPATH, '//*[@id="main"]/div/div/div[1]').text, } company_data = str( driver.find_element( By.XPATH, '//*[@id="main"]/div/div/div[2]/div').text).split('\n') for compdata in company_data: mailstr = 'Mail:' if compdata.startswith(mailstr): company['Mail'] = compdata.split(mailstr)[1] phonestr = 'Telefon:' if compdata.startswith(phonestr): company['Tel'] = compdata.split(phonestr)[1] componies.append(company) driver.back() # region > Write to excel row = 0 workbook = Workbook(excel_file_name) worksheet = workbook.add_worksheet('Bursa Deri OSB') hformat = workbook.add_format() hformat.set_bold() hformat.set_align('center') hformat.set_align('vcenter') worksheet.write(row, 0, 'Firma Adi', hformat) worksheet.write(row, 1, 'Mail Adresi', hformat) worksheet.write(row, 2, 'Telefon', hformat) worksheet.set_column('A:A', 100) worksheet.set_column('B:B', 80) worksheet.set_column('C:C', 50) row += 1 for cmpy in componies: if 'Name' in cmpy: worksheet.write(row, 0, str(cmpy['Name'])) if 'Mail' in cmpy: worksheet.write(row, 1, str(cmpy['Mail'])) if 'Tel' in cmpy: worksheet.write(row, 2, str(cmpy['Tel'])) row += 1 if os.path.exists(excel_file_name): os.remove(excel_file_name) workbook.close() # endregion time.sleep(3) driver.quit()