def main(position, location): """Run the main program routine""" scraped_jobs = [] scraped_urls = set() url = get_url(position, location) # setup web driver options = EdgeOptions() options.use_chromium = True driver = Edge(options=options) driver.implicitly_wait(5) driver.get(url) # extract the job data while True: cards = driver.find_elements_by_class_name('jobsearch-SerpJobCard') get_page_records(cards, scraped_jobs, scraped_urls) try: driver.find_element_by_xpath('//a[@aria-label="Next"]').click() except NoSuchElementException: break except ElementNotInteractableException: driver.find_element_by_id('popover-x').click() # to handle job notification popup get_page_records(cards, scraped_jobs, scraped_urls) continue # shutdown driver and save file driver.quit() save_data_to_file(scraped_jobs)
def open_browser(env, browser='chrome', incognito=True): driver = None if browser == "chrome": chrome_options = webdriver.ChromeOptions() if incognito: chrome_options.add_argument('--incognito') driver = webdriver.Chrome(executable_path=chrome_driver_path, chrome_options=chrome_options) elif browser == "msedge": edge_options = EdgeOptions() edge_options.use_chromium = True if incognito: edge_options.add_argument('-inprivate') driver = Edge(executable_path=msedge_driver_path, options=edge_options) elif browser == "firefox": firefox_options = webdriver.FirefoxOptions() if incognito: firefox_options.add_argument('--incognito') driver = webdriver.Firefox(executable_path=firefox_driver_path, options=firefox_options) elif browser == "ie": driver = webdriver.Ie(executable_path=ie_driver_path) # selenium grid # driver = webdriver.Remote(command_executor='http://10.22.40.234:5555/wd/hub', # desired_capabilities=DesiredCapabilities.INTERNETEXPLORER) elif browser == "safari": driver = webdriver.Safari() # 获取web配置 with open(web_config_path, 'r', encoding='utf-8') as file: data = yaml.load(file, Loader=yaml.FullLoader) # 以下是一个示例,基于config/web_config.yaml文件做的配置 if env == "msit": url = data["portal"]['msit'] Logger.info("Open Url: %s", url) driver.get(url) if env == "srol1": url = data["portal"]['srol1'] Logger.info("Open Url: %s", url) driver.get(url) if env == "srol2": url = data["portal"]['srol2'] Logger.info("Open Url: %s", url) driver.get(url) if env == "ppe": url = data["portal"]['ppe'] Logger.info("Open Url: %s", url) driver.get(url) if env == "refe": url = data["portal"]['refe'] Logger.info("Open Url: %s", url) driver.get(url) elif env == '': driver = None driver.maximize_window() driver.implicitly_wait(data['implicitly_wait']) return driver
def browserOpen(self, driver): ''' # Which browser do you want to test? He has to go to the config.ini file and uncomment browser name. :param driver: :return: ''' config = ConfigParser() file_path = os.path.dirname(os.path.abspath('')) + "/Config/config.ini" config.read(file_path) browser = config.get("browserType", "browserName") log.info(f"You had select {browser} browser.") url = config.get("testUrl", "URL") log.info(f"The test url is: {url}") # check browser name right or wrong print("--------Open browser--------") if browser == "Chrome": driver = webdriver.Chrome(self.chrome_driver_path) log.info("Start chrome browser.") elif browser == "Firefox": driver = webdriver.Firefox( executable_path=self.firefox_driver_path) log.info("Start Firefox browser.") elif browser == "IE": driver = webdriver.Ie(executable_path=self.ie_driver_path) log.info("Start IE browser.") elif browser == "Safari": driver = webdriver.Safari( executable_path="/use/local/bin/safaridriver") log.info("Start safari browser.") elif browser == "Edge": # driver = webdriver.Edge(executable_path=self.edge_driver_path) # edge_options = EdgeOptions() # edge_options.use_chromium = True # if we miss this line, we can't make Edge headless # # A little different from Chrome cause we don't need two lines before 'headless' and 'disable-gpu' # edge_options.add_argument('headless') # edge_options.add_argument('disable-gpu') # # driver = Edge(executable_path=self.edge_driver_path, options=edge_options) driver = Edge(executable_path=self.edge_driver_path) log.info("Start Edge browser.") driver.get(url) log.info(f"Open url: {url}") driver.maximize_window() log.info("Maximize the current window.") driver.implicitly_wait(10) log.info("Set implicitly wait 10 seconds.") return driver
class HeaderText(unittest.TestCase): def setUp(self): options = EdgeOptions() options.use_chromium = True options.binary_location = "C:\\Program Files (x86)\\Microsoft\\Edge Dev\\Application\\msedge.exe" dir = os.path.dirname(os.path.realpath(__file__)) edge_driver_path = dir + "\\edgedriver_win64\\msedgedriver.exe" self.driver = Edge(options=options, executable_path=edge_driver_path) self.driver.implicitly_wait(30) self.driver.maximize_window() self.driver.get("http://localhost:4200") def test_HeaderText(self): headerText = self.driver.find_element_by_css_selector("h1").get_attribute("innerText") self.assertEqual("todos", headerText) def tearDown(self): self.driver.quit()
class AddAToDoText(unittest.TestCase): def setUp(self): options = EdgeOptions() options.use_chromium = True options.binary_location = "C:\\Program Files (x86)\\Microsoft\\Edge Dev\\Application\\msedge.exe" dir = os.path.dirname(os.path.realpath(__file__)) edge_driver_path = dir + "\\edgedriver_win64\\msedgedriver.exe" self.driver = Edge(options=options, executable_path=edge_driver_path) self.driver.implicitly_wait(30) self.driver.maximize_window() self.driver.get("http://*****:*****@class='toggle']/following-sibling::label").get_attribute("innerText") self.assertEqual("The test is adding this todo", addedToDoText) def tearDown(self): self.driver.quit()
class Aplicacion(): def __init__(self): self.ventana1=tk.Tk() self.ventana1.title('TFS') self.ventana1.geometry('270x250') #self.ventana1.iconbitmap('C:\\Users\\JorgeDanielGarcia\\Desktop\\Descarga.ico\\') self.label1=tk.Label(self.ventana1,text="Ticket:") self.label1.grid(column=0, row=0,) self.dato1=tk.IntVar() self.entry1=tk.Entry(self.ventana1, width=30, textvariable=self.dato1) self.entry1.grid(column=1, row=0) self.entry1.insert(0, "123456") self.label2=tk.Label(self.ventana1,text="Carpeta:") self.label2.grid(column=0, row=1) self.dato2=tk.StringVar() self.entry2=tk.Entry(self.ventana1, width=30, textvariable=self.dato2) self.entry2.grid(column=1, row=1) radioGroup = LabelFrame(self.ventana1, text = "Seleccionar modo de ejecucion") radioGroup.grid(column=0, row=4 , columnspan = 5 , pady = 2) self.seleccion=tk.IntVar() self.radio1=tk.Radiobutton(radioGroup,text="Background", variable=self.seleccion, value=1) self.radio1.grid(column=0, row=2) self.radio2=tk.Radiobutton(radioGroup,text="Online", variable=self.seleccion, value=2) self.radio2.grid(column=1, row=2) self.boton1=tk.Button(self.ventana1, text="Upload", command=self.upload) self.boton1.grid(column=1, row=10) # self.boton1.config(bg='white') self.boton2=tk.Button(self.ventana1, text=" /\ ",command=self.directory) self.boton2.grid(column=5, row=1) # self.boton2.config(bg='white') # self.ventana1.config(bg='light grey') self.ventana1.mainloop() def directory(self): self.root = tk.Tk() self.root.withdraw() self.folder_selected = filedialog.askdirectory() buscar = "/" reemplazar = "\\" carpeta = self.folder_selected.replace(buscar,reemplazar) carpeta2 = carpeta + reemplazar self.entry2.insert(0, carpeta2) print(carpeta2) def validacion1(self): ticket= self.dato1.get() # ticket dire = self.dato2.get() # dire print (ticket) print (dire) # cantdocs=len(glob.glob(dire + "*")) # validacion cant de documentos, si no hay documentos en la carpeta, error try: os.listdir(dire) error = 0 cantdocs=len(glob.glob(dire + "*")) except Exception as e: print(e) error = 1 if error == 0: if cantdocs == 0: self.label4=tk.Label(self.ventana1,text= "No hay docs en la carpeta") self.label4.grid(column=1, row=15) error = 1 return error def accesotfs(self, ruta, dire): # self.robot = robot ticket1 = str(self.dato1.get()) #self.chrome_options = Options() #if self.seleccion.get()==1: #self.chrome_options.add_argument("--headless") # self.driver = webdriver.Chrome(executable_path=r"C:\Google\chromedriver") # self.driver = webdriver.Edge(executable_path=r"C:\Google\msedgedriver") self.edge_option = EdgeOptions() self.edge_option.add_argument("hide_console") # options.add_argument = ["hide_console"] # self.driver = webdriver.Edge(options) # self.driver = webdriver.Edge("C:\Google\msedgedriver", options=self.edge_option) self.driver = Edge("C:\Google\msedgedriver", service_args= ["hide_console"]) url = "http://10.1.27.11:8080/tfs/TFSYPF/E2E/_workitems?_a=edit&id=" urlarmada = url + ticket1 # Conectarse self.driver.get(urlarmada) self.driver.implicitly_wait(8) # self.robot.typewrite("SE33439") # time.sleep(1) # self.robot.press('tab') # time.sleep(1) # robot.typewrite("Homeroibm2020-") # time.sleep(1) # self.robot.press('tab') # time.sleep(1) # self.robot.press('enter') # time.sleep(4) # dirigirse hacia el modulo attachment self.attachment = "ui-id-7" self.driver.find_element_by_id(self.attachment).click() # boton agregar adjuntos # time.sleep(1) self.driver.implicitly_wait(5) for i in ruta: nombre_archivo = i direarchivo= dire + nombre_archivo self.driver.find_element_by_xpath("/html/body/div[2]/div/div[2]/div/div[2]/div/div[3]/div[4]/div[2]/div/div[2]/div[2]/table/tbody/tr[5]/td/table/tbody/tr/td[2]/table/tbody/tr/td/div/div[3]/table/tbody/tr/td/div/ul/li[2]").click() # time.sleep(1) # boton seleccionar archivo self.driver.implicitly_wait(5) self.driver.find_element_by_xpath("/html/body/div[4]/div[2]/div/form/input[1]").send_keys(direarchivo) # time.sleep(1) # boton aceptar self.driver.implicitly_wait(5) self.driver.find_element_by_xpath("/html/body/div[4]/div[3]/div/button[1]").click() # time.sleep(1) # boton guardar self.driver.implicitly_wait(5) self.driver.implicitly_wait(5) self.driver.find_element_by_xpath("/html/body/div[2]/div/div[2]/div/div[2]/div/div[3]/div[4]/div[2]/div/div[2]/div[1]/ul/li[2]").click() def upload(self): if self.seleccion.get()==1 or self.seleccion.get()==2: resul = self.validacion1() if resul == 0: print ("todo ok") # self.label3=tk.Label(self.ventana1,text= cantdocs) # self.label3.grid(column=0, row=6) # self.label4=tk.Label(self.ventana1,text= "docs en Direccion indicada") # self.label4.grid(column=1, row=6) # self.label5=tk.Label(self.ventana1,text= cantdocs) # self.label5.grid(column=0, row=7) # self.label6=tk.Label(self.ventana1,text= "docs subidos a TFS") # self.label6.grid(column=1, row=7) # self.popup() dire = self.dato2.get() # dire # validacion si algun archivo pesa mas de 4mb - #4.194.304 si es mayor a este numero, entonces pesa mas de 4mb el archivo ruta = os.listdir(dire) print(ruta) for i in ruta: nombre_archivo = i direarchivo= dire + nombre_archivo sizefile = os.stat(direarchivo).st_size print(direarchivo, "--- este archivo pesa", sizefile , "bytes") # validacion existencia de usuario y contrasena #keyQ = winreg.OpenKey(winreg.HKEY_CURRENT_USER, 'Environment', 0, winreg.KEY_QUERY_VALUE) #try: # usuario = winreg.QueryValueEx(keyQ, "UsuarioE2E") # contrasena = winreg.QueryValueEx(keyQ, "PassE2E") # error = 0 #except Exception as e: # print(e) # error = 1 #if error == 1: # print("No existe usuario o contrasena") #if error == 0: # print("todo bien") # print(usuario[0]) # print(contrasena[0]) #armado de ruta para acceder a TFS #if ruta: conectar = self.accesotfs(ruta, dire)
class TwitterBot(): def __init__(self): self.driver = Edge() self.driver.maximize_window() self.driver.get('https://twitter.com') self.driver.implicitly_wait(3) def goToTwitter(self): self.driver.get('https://twitter.com') def login(self): self.driver.find_element_by_xpath("//a[@href='/login']").click() #I used sleep because before this time there is another instance of an element named like below. #It is crucial to get the right element in order to interact with it. sleep(1) self.driver.find_element_by_xpath( "//input[@name='session[username_or_email]']").send_keys(username) self.driver.find_element_by_xpath( "//input[@name='session[password]']").send_keys(password) self.driver.find_element_by_xpath( "//div[@data-testid='LoginForm_Login_Button']").click() def basicSearch(self, topic): self.driver.find_element_by_xpath( "//input[@data-testid='SearchBox_Search_Input']").send_keys(topic) self.driver.find_element_by_xpath( "//input[@data-testid='SearchBox_Search_Input']").submit() def advancedSearch(self, exact, any, none, hashtags, dateFrom, dateTo): finalSearch = '' #This is to accommodate for different search types that a user might want. if exact != None: finalSearch += '"' + exact + '" ' if any != None: finalSearch += '(' + any + ') ' if none != None: finalSearch += '-' + none + ' ' if hashtags != None: finalSearch += '(#' + hashtags + ') ' if dateTo != None: finalSearch += 'until:' + dateTo + ' ' if dateFrom != None: finalSearch += 'since:' + dateFrom + ' ' self.driver.find_element_by_xpath( "//input[@data-testid='SearchBox_Search_Input']").send_keys( finalSearch) self.driver.find_element_by_xpath( "//input[@data-testid='SearchBox_Search_Input']").submit() def scrapeTweets(self, desiredNum): allLines = '' oldDataLines = [] dataLines = ['init'] tweetsFile = open('tweets.csv', 'w') #I included this array to help clean data later dirtyArray = [ 'Quote Tweet', 'Promoted', 'Show this thread', '', '\n', ' ' ] numDataLines = 0 while numDataLines < desiredNum and oldDataLines != dataLines: oldDataLines = dataLines sleep(1) #all these are different types of data that I do not want to pick up. dirtyData = self.driver.find_elements_by_xpath( "//div[@class='css-1dbjc4n r-1d09ksm r-18u37iz r-1wbh5a2']") dirtyData2 = self.driver.find_elements_by_xpath( "//div[@class = 'css-1dbjc4n r-18u37iz r-1wtj0ep r-156q2ks r-1mdbhws']" ) dirtyData3 = self.driver.find_elements_by_xpath( "//div[contains(text(),'Replying to')]") dirtyData4 = self.driver.find_elements_by_xpath( "//div[@role = 'blockquote']") #adding all the dirty data into one array for dirt in dirtyData2: dirtyData.append(dirt) for dirt in dirtyData3: dirtyData.append(dirt) for dirt in dirtyData4: dirtyData.append(dirt) #the data is stored with strings with many lines so I split the strings up by line and have an array where each index is one lin dirtyLines = [] for dirt in dirtyData: dirt = dirt.text chunks = dirt.split('\n') for chunk in chunks: dirtyLines.append(chunk) #this includes dirty data that will be weeded out later data = self.driver.find_elements_by_xpath( "//div[@data-testid='tweet']") #same thing I did with dirtyLines dataLines = [] for datapoint in data: datapoint = datapoint.text chunks = datapoint.split('\n') for chunk in chunks: dataLines.append(chunk) #I check oldDataLines as well to avoid redundancy for line in dataLines: if line not in dirtyLines and line not in oldDataLines and line not in dirtyArray: if numDataLines >= desiredNum: break try: noPunctuationLine = re.sub(r'[^\w\s]', '', line) tweetsFile.write(noPunctuationLine) tweetsFile.write("\n") allLines += line numDataLines += 1 except Exception: print('This data point not encodable.') height = self.driver.execute_script( "return document.documentElement.scrollHeight") self.driver.execute_script("window.scrollTo(0, " + str(height) + ");") tweetsFile.close() return allLines
else: if ticket == 99: print("no hay numero de 7 digitos") else: print("el ticket debe ser", ticket) print(lista) ticket = lista[0] print(ticket) edge_option = EdgeOptions() edge_option.add_argument("hide_console") driver = Edge("C:\Google\msedgedriver", service_args=["hide_console"]) url = "http://10.1.27.11:8080/tfs/TFSYPF/E2E/_workitems?_a=edit&id=" urlarmada = url + ticket driver.get(urlarmada) driver.implicitly_wait(8) attachment = "ui-id-7" driver.find_element_by_id(attachment).click() direarchivo = dir_inicial + nombre_archivo driver.implicitly_wait(5) driver.find_element_by_xpath( "/html/body/div[4]/div[2]/div/form/input[1]").send_keys( direarchivo) driver.implicitly_wait(5) driver.find_element_by_xpath( "/html/body/div[4]/div[3]/div/button[1]").click() driver.implicitly_wait(5) driver.implicitly_wait(5)
# wd = switch_window('神奇女侠', wd) # # print(wd.title) if __name__ == '__main__': # options = EdgeOptions() # options.add_argument("load-extension=C:/Users/10782/AppData/Local/Microsoft/Edge/User Data/Default") # wd = webdriver.Edge(options) # wd.implicitly_wait(10) # # main_window = wd.current_window_handle # # wd.get('https://hfut.yuketang.cn/pro/lms/83i7ZSNAWqB/4041808/studycontent') driver_url = 'C:/Users/10782/AppData/Local/Programs/Python/Python38/MicrosoftWebDriver.exe' options = EdgeOptions() options.use_chromium = True # options.add_extension( # 'C:/Users/10782/AppData/Local/Microsoft/Edge/User Data/Default/Extensions/bblkpdkdloalbiifhhmekiaejmdkgohj/1.0.7_0.crx') options.add_argument( 'user-data-dir=C:/Users/10782/AppData/Local/Microsoft/Edge/User Data') wd = Edge(options=options, executable_path=driver_url) wd.implicitly_wait(10) wd.get('https://hfut.yuketang.cn/pro/lms/83i7ZSNAWqB/4041808/studycontent') # wd.quit()