def __init__(self, name): print("\n") print("\rInitializing the Firefox driver...", end="") self.driver = SeleniumDriver().get_driver() self.driver.implicitly_wait(40) self.export_path_json = os.path.join("outputs", name, "json", "json_output.json") self.export_path_csv = os.path.join("outputs", name, "csv", "csv_output.csv") self.make_dirs(name) self.driver.execute_script("window.open('');")
def has_error(self, inputBox): """ :param inputBox: the section where users send input strings :return: returns true if 'has-error' is in the inputBox class """ driver = SeleniumDriver(self.driver) driver.get_element(inputBox, XPATH) inputBoxClass = driver.get_element_attribute(inputBox, XPATH, ClASS) if 'has-error' in inputBoxClass: return True else: return False
def is_on(self, element): """ :param element: the current element (XPATH) :return: returns true if 'state-on' or 'state1' is in the element's class """ driver = SeleniumDriver(self.driver) driver.get_element(element, XPATH) element_class = driver.get_element_attribute(element, XPATH, ClASS) if 'state-on' in element_class or 'state1' in element_class: return True else: return False
def get_browser(context): # Setup browser context.browser = SeleniumDriver().get_chrome_browser() yield context.browser # Teardown context.browser.quit()
class Scrapper: def __init__(self, name): print("\n") print("\rInitializing the Firefox driver...", end="") self.driver = SeleniumDriver().get_driver() self.driver.implicitly_wait(40) self.export_path_json = os.path.join("outputs", name, "json", "json_output.json") self.export_path_csv = os.path.join("outputs", name, "csv", "csv_output.csv") self.make_dirs(name) self.driver.execute_script("window.open('');") def __del__(self): while self.driver.window_handles: self.driver.switch_to.window(self.driver.window_handles[0]) self.driver.close() def get_existing_driver(self): return self.driver @staticmethod def make_dirs(name): try: os.mkdir("outputs") except FileExistsError: pass try: os.mkdir(os.path.join("outputs", name)) except FileExistsError: pass try: os.mkdir(os.path.join("outputs", name, "json")) except FileExistsError: pass try: os.mkdir(os.path.join("outputs", name, "csv")) except FileExistsError: pass def get_page_soup(self, url, wait_css_selector=None, tab=0, wait_class_name=None): print("\rOpening and Extracting the url:", url, end="") self.driver.switch_to.window(self.driver.window_handles[tab]) self.driver.get(url) if wait_css_selector is not None: self.driver.find_element_by_css_selector(wait_css_selector) elif wait_class_name is not None: self.driver.find_element_by_class_name(".".join( map(str, wait_class_name.split()))) else: time.sleep(5) return self.get_soup_from_page_source(self.driver.page_source) def wait_until_element_loads(self): pass @staticmethod def get_soup_from_page_source(page_source): return BeautifulSoup(page_source, "lxml") @staticmethod def find_all_tag_matches_by_attribute_from_soup(soup, tag="div", attribute="class", attribute_value=""): if attribute is None: return soup.findAll(tag) else: return soup.findAll(tag, {attribute: attribute_value}) def export_to_csv(self, data=None): file = None try: if not data: return file = open(self.export_path_csv, "w+", encoding='utf-8', newline='') csv_writer = csv.writer(file) fields = data[0].keys() csv_writer.writerow(fields) for row in data: list_row = [] for field in fields: list_row.append(row[field]) csv_writer.writerow(list_row) finally: if file is not None: file.close() def export_to_json(self, data=None): file = None try: if data is None: return file = open(self.export_path_json, "w") file.write(json.dumps(data, indent=4, ensure_ascii=False)) finally: if file is not None: file.close()
def __init__(self): self.utils = Utils(SeleniumDriver().browser) # Add scripts here self.options = { 'example_script': ExampleScript(), }
import requests from bs4 import BeautifulSoup from selenium import webdriver from webdriver_manager.chrome import ChromeDriverManager from selenium_driver import SeleniumDriver from keys import keys from create_emails import CreateEmail from spotify_accounts import SpotifyAccount options = webdriver.ChromeOptions() options.add_argument('--disable-dev-shm-usage') options.add_argument('--no-sandbox') # options.add_argument('--headless') driver = SeleniumDriver( webdriver.Chrome(ChromeDriverManager().install(), options=options)) if __name__ == '__main__': url = keys['google_url'] # CreateEmail(url, driver) SpotifyAccount(keys['spotify_url'], driver)