class crearCorros: def __init__(self): self.urlProtocoe ='http://3g2upl4pq6kufc4m.onion','https://mail.protonmail.com/create/new','https://singlelogin.org/registration.php' print(self.urlProtocoe[2]) self.tbb_dir = "/usr/local/share/tor-browser_en-US" self.protocoe = TorBrowserDriver(self.tbb_dir, tbb_logfile_path='test.log') self.dirNombre='/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/nombre.txt' self.nombre=open(self.dirNombre,'r+') self.dirapellido='/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/apellidos.txt' self.apellido=open(self.dirapellido,'r+') self.dirContrasenna='/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña.txt' self.contrasenna=open(self.dirContrasenna,'r+') self.dirCotrasenna2='/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña2.txt' self.Contrasenna2=open(self.dirCotrasenna2,'r+') self.datosContrasenna=[] self.lista=[] for self.d in range(0,101): self.lista.append(self.nombre.readline()+'asdsdf') self.datosContrasenna.append(self.contrasenna.readline()+"blabal") for self.d in range(0,100): self.lista[self.d]=re.sub('\n','asdaawderca',self.lista[self.d]) self.datosContrasenna[self.d]=re.sub('\n','radabanals',self.datosContrasenna[self.d]) self.lista[self.d]=re.sub( r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+", r"\1", normalize( "NFD",self.lista[self.d]), 0, re.I ) self.lista[self.d]= normalize( 'NFC',self.lista[self.d]) self.datosContrasenna[self.d]=re.sub( r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+", r"\1", normalize( "NFD",self.datosContrasenna[self.d]), 0, re.I ) self.datosContrasenna[self.d]= normalize( 'NFC',self.datosContrasenna[self.d]) self.lista[self.d]+='@maildrop.cc' def iniciarTor(self): self.protocoe.load_url(self.urlProtocoe[2]) def ingresarDatos(self,fila): self.eamil=self.protocoe.find_element_by_name('email') self.eamil.click() sleep(random.uniform(1.0,4)) self.eamil.send_keys(self.lista[fila]) self.pasword=self.protocoe.find_element_by_name("password") self.pasword.click() sleep(random.uniform(1.0,5)) self.pasword.send_keys(self.datosContrasenna[fila]) self.name=self.protocoe.find_element_by_name("name") sleep(random.uniform(1.0,6)) self.name.click() self.name.send_keys(self.datosContrasenna[fila]) sleep(random.uniform(2.0,8.7)) self.name.send_keys(Keys.RETURN) def serrarTor(self): self.protocoe.close() def imprimirDatos(self): #self.dirscv='/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/emailFalsos/contraseñasYcorreos.csv' #self.datos=csv.writer(open(self.dirscv,'w')) for d in range(0,100): #self.datos.writerow([self.lista[d]]) #self.datos.writerow([self.datosContrasenna[d]]) print(self.lista[d]) print(self.datosContrasenna[d])
def up(name, ema, pas): browser = TorBrowserDriver(tbb_dir, tor_cfg=cm.USE_STEM) # connect to site browser.load_url( "https://www.udemy.com/join/signup-popup/?locale=en_US&response_type=html&next=https%3A%2F%2Fwww.udemy.com%2F", wait_on_page=5, wait_for_page_body=True) # find link button #reg_el = browser.find_element_by_link_text("Sign up") # https://www.udemy.com/join/login-popup/?locale=en_US&response_type=html&next=https%3A%2F%2Fwww.udemy.com%2F # click # reg_el.click() # enter full name full_name = browser.find_element_by_id("id_fullname") full_name.send_keys(name) # enter email email_el = browser.find_element_by_id("email--1") email_el.send_keys(ema) # enter password pass_el = browser.find_element_by_id("password") pass_el.send_keys(pas) # Scroll browser.execute_script("window.scrollBy(0,200)") browser.execute_script( 'document.getElementById("id_subscribe_to_emails").checked = false') # find submit link sub_el = browser.find_element_by_id('submit-id-submit') # click submit sub_el.click() sleep(1) # check if 'occupation' in browser.current_url: # find submit link sleep(3) try: browser.execute_script( 'document.getElementsByClassName("ot-sdk-container").sytle.display = "none"' ) except: pass cl = browser.find_elements_by_class_name("udlite-btn") try: cl[0].click() except: browser.execute_script( 'document.getElementsByClassName("ot-sdk-container").sytle.display = "none"' ) cl[0].click() sleep(3) browser.close() return True if '=1' in browser.current_url: browser.close() return True
class TruliaHelper(): def __init__(self): self.url = 'https://www.trulia.com' # need to set chrome path here. tbpath = "/home/XX/XXXX/tor-browser-linux64-8.0.8_en-US/tor-browser_en-US" self.driver = TorBrowserDriver(tbb_path=tbpath, tbb_logfile_path='test.log') # self.driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=binary) # self.driver = webdriver.Chrome(executable_path='../utility/chromedriver.exe', chrome_options=chrome_options) # method to get items from given link. def getItems(self): df=pd.read_excel("/home/XXXXX/XXXXX/XXXXXX.xlsx") a=df['Site Address'] b=df['Site City'] c=df['Site State'] d=df['Site Zip'] items = [] # keywords = ['512 W 10th St Perris CA 92570', 'New York, NY', 'San Francisco, CA', 'Washington, CA'] for keyword in (pd.concat([a,b,c,d],axis=1)).values.tolist(): # keywords = ['512 W 10th St Perris CA 92570'] * 10 # for keyword in keywords: self.driver.get(self.url) search_box = self.driver.find_element_by_id("homepageSearchBoxTextInput") search_box.clear() search_box.send_keys(str(keyword)) search_btn = self.driver.find_element_by_xpath("//button[@data-auto-test-id='searchButton']") if search_btn: search_btn.click() time.sleep(10) items.append(self.getItemDetail()) # break self.driver.close() return items def getItemDetail(self): data = {} try: soup = BeautifulSoup(self.driver.page_source, u'html.parser') #image = soup.find("div", attrs={"class": "Tiles__TileBackground-fk0fs3-0 cSObNX"}).find("img")["src"] price = soup.find("div", attrs={"class": "Text__TextBase-sc-1cait9d-0-div Text__TextContainerBase-sc-1cait9d-1 hlvKRM"}).text # container = soup.find("div", attrs={"class": "resultsColumn"}).find("ul") # items = container.findAll("li", recursive=False) print(price) except: pass return data # method to start process. def start(self): items = self.getItems() print("Items : ",items)
class TestSite(unittest.TestCase): def setUp(self): # Point the path to the tor-browser_en-US directory in your system tbpath = '/home/kdas/.local/tbb/tor-browser_en-US/' self.driver = TorBrowserDriver(tbpath, tbb_logfile_path='test.log') self.url = "https://check.torproject.org" def tearDown(self): # We want the browser to close at the end of each test. self.driver.close() def test_available(self): self.driver.load_url(self.url) # Find the element for success element = self.driver.find_element_by_class_name('on') self.assertEqual( str.strip(element.text), "Congratulations. This browser is configured to use Tor.") sleep(2) # So that we can see the page
class validarCuentas: def __init__(self): self.dirNombre = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/nombre.txt' self.nombre = open(self.dirNombre, 'r+') self.email = [] for self.d in range(0, 101): self.email.append(self.nombre.readline() + 'asdsdf') for self.d in range(0, 100): self.email[self.d] = re.sub('\n', 'asdaawderca', self.email[self.d]) self.email[self.d] = re.sub( r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+", r"\1", normalize("NFD", self.email[self.d]), 0, re.I) self.email[self.d] = normalize('NFC', self.email[self.d]) def iniciarTor(self, fila): self.tbb_dir = "/usr/local/share/tor-browser_en-US" self.mailpro = TorBrowserDriver(self.tbb_dir, tbb_logfile_path='test.log') self.mailpro.load_url('https://maildrop.cc/') def ingresarDatos(self, fila): self.pulsar = self.mailpro.find_elements_by_xpath('//input')[1] self.pulsar.send_keys(self.email[fila]) self.pulsar.send_keys(Keys.RETURN) sleep(6) self.correo = self.mailpro.find_elements_by_xpath('//div[@class]')[14] self.correo.click() sleep(5) self.iframe = self.mailpro.find_element_by_tag_name('iframe') self.mailpro.switch_to.frame(self.iframe) print(self.mailpro.page_source) self.mailpro.find_elements_by_xpath('//a[@href]')[1].click() def serrarTor(self): self.mailpro.close() def imprimirDatos(self): for d in range(0, 100): print(self.email[d])
class DescargarPdf: def __init__(self): self.tbb_dir = "/usr/local/share/tor-browser_en-US" self.usuario = [] self.contraseñaTxT = [] self.conversor = '?convertedTo=pdf' def iniciarTor(self): self.zLibraty = TorBrowserDriver(self.tbb_dir, tbb_logfile_path='test.log') def iniciarSecion(self): self.element = self.zLibraty.find_element_by_name("email") self.element.send_keys(self.correo) sleep(2) self.element2 = self.zLibraty.find_elements_by_class_name( "form-control")[1] self.element2.send_keys(self.contraseña) self.element2.send_keys(Keys.RETURN) def paginaDescargas(self): print("estoy en la funcion paginaDescagas") sleep(4) self.zLibraty.get(self.url) self.html = self.zLibraty.page_source def paginaPrinsipal(self, añoInicial, añoFinal): self.urlAños = 'http://zlibraryexau2g3p.onion/s/?yearFrom=' + str( añoInicial) + '&yearTo=' + str(añoFinal) self.url = self.urlAños def cambiarPagina(self, x): self.url += '&page=' + str(x) def Crearcsv(self): self.carpetaUrl = '/home/dgc7/Documentos/zlibrary/libros1920-1921/url' try: os.mkdir(self.carpetaUrl) except OSError as e: if e.errno != errno.EEXIST: raise self.escrivirUrlWed = csv.writer( open('/home/dgc7/Documentos/zlibrary/libros1920-1921/url/url2.csv', 'w')) self.imprimirUrlPdf = csv.writer( open( '/home/dgc7/Documentos/zlibrary/libros1920-1921/url/urlDowload2.csv', 'w')) def credenciales(self, numeroUsuario): print("llegue") self.correo = self.usuario[numeroUsuario] self.contraseña = self.contraseñaTxT[numeroUsuario] self.urlLoguin = 'http://zlibraryexau2g3p.onion' self.zLibraty.get(self.urlLoguin) def UsuariosYcontraseñas(self): self.dir = '/home/dgc7/Documentos/zlibrary/credenciales/contraseñasYcorreos.txt' self.data = open(self.dir, 'r+') for self.i in range(0, 200): if self.i % 2 == 0: self.usuario.append(self.data.readline()) if self.i % 2 != 0: self.contraseñaTxT.append(self.data.readline()) def urlPdf(self, ): self.boleanoPdf = 0 self.respaldoContador = 0 self.contadorUsuarios = usuarioUsadosLeer() self.contadorLibros = datosDescarga(4) self.contadorLibros2 = self.contadorLibros % 10 self.Crearcsv() self.soup = BeautifulSoup(self.html, 'html.parser') try: for self.urlwed in self.soup.find_all(itemprop="name"): self.contador = 0 self.urlwed = self.urlwed.find('a', href=re.compile('')) self.urlDowload = self.urlwed.get('href') self.urlpdfGeleneralH = re.sub('/book/', 'https://b-ok.cc/book/', self.urlDowload) self.urlDowload = re.sub( '/book/', 'http://zlibraryexau2g3p.onion/book/', self.urlDowload) self.escrivirUrlWed.writerow([self.urlDowload]) print(self.urlDowload) voleano = validarFormato(self.urlpdfGeleneralH) guardarNumeroDescargas(self.contadorLibros) print(self.respaldoContador) if self.contadorLibros == self.respaldoContador: for self.urlRedirec in range(0, 1): self.zLibraty.get(self.urlDowload) sleep(5) self.htmlPdf = self.zLibraty.page_source self.soupRedirec = BeautifulSoup( self.htmlPdf, 'html.parser') self.urlDowloadPDF = self.soupRedirec.find( class_="btn btn-primary dlButton addDownloadedBook" ) self.urlDowloadPDF = self.urlDowloadPDF.get('href') self.urlDowloadPDF = re.sub( '/dl/', 'http://zlibraryexau2g3p.onion/dl/', self.urlDowloadPDF) self.imprimirUrlPdf.writerow([self.urlDowloadPDF]) print(self.urlDowloadPDF) print("vamos a por el if") sleep(10) if voleano == True: self.zLibraty.set_page_load_timeout(8) try: self.zLibraty.get(self.urlDowloadPDF) except: self.zLibraty.set_page_load_timeout(70) self.zLibraty.refresh() print("funciona PDF ") voleano = False sleep(5) self.contadorLibros += 1 self.contadorLibros2 += 1 else: try: self.zLibraty.set_page_load_timeout(5) try: self.zLibraty.get(self.urlDowloadPDF) except: sleep(4) pyautogui.press("down") sleep(2) pyautogui.press("enter") self.zLibraty.set_page_load_timeout(70) except: print( "\nerror al controlasr el teclado y dar enter\n" ) raise sleep(5) self.zLibraty.refresh() self.contadorLibros += 1 self.contadorLibros2 += 1 sleep(20) tiempoDescarga() informaiconPdf(self.urlpdfGeleneralH) self.respaldoContador += 1 if self.contadorLibros == self.respaldoContador: if self.contadorLibros2 % 10 == 0: print((self.contadorLibros2 - 1) % 10) self.contador += 1 pyautogui.hotkey("ctrl", "shift", "u") sleep(2) pyautogui.press("enter") sleep(7) pyautogui.press("enter") sleep(15) self.contadorUsuarios += 1 print(self.contadorUsuarios) try: self.zLibraty.switch_to_window( self.zLibraty.window_handles[0]) except: print("error al cambian de ventana") usuarioUsadosReescrivir(self.contadorUsuarios) print("por aqui¿¿¿¿¿¿") self.credenciales(self.contadorUsuarios) print("no por aqui¿¿¿¿¿¿") sleep(23) self.iniciarSecion() sleep(7) self.contadorLibros2 = 0 sleep(15) print("numero de li bros por usuario ", self.contadorLibros2) if self.contador == 5: self.contador = 0 except OSError as e: print(e.strerror) print("error en la urlPdf:::::") guardarNumeroDescargas(self.contadorLibros) usuarioUsadosReescrivir(self.contadorUsuarios) print(self.contadorLibros) raise print("termine la pagina") def DescargarContenido(self, _html): self.contenido = _html def serrarTor(self): self.zLibraty.close()
class DescargarPdf: def __init__(self): self.tbb_dir = "/usr/local/share/tor-browser_en-US" self.usuario = [] self.contraseñaTxT = [] self.conversor = '?convertedTo=pdf' def iniciarTor(self): self.zLibraty = TorBrowserDriver(self.tbb_dir, tbb_logfile_path='test.log') def iniciarSecion(self): self.zLibraty.refresh() sleep(10) self.element = self.zLibraty.find_element_by_name("email") self.element.send_keys(self.correo) sleep(2) self.element2 = self.zLibraty.find_elements_by_class_name( "form-control")[1] self.element2.send_keys(self.contraseña) self.element2.send_keys(Keys.RETURN) def paginaDescargas(self): self.zLibraty.load_url(self.url) self.html = self.zLibraty.page_source def paginaPrinsipal(self, añoInicial, añoFinal): self.urlAños = 'http://zlibraryexau2g3p.onion/s/?yearFrom=' + str( añoInicial) + '&yearTo=' + str(añoFinal) self.url = self.urlAños def cambiarPagina(self, x): self.url += '&page=' + str(x) def Crearcsv(self): print("hola") self.carpetaUrl = '/home/dgc7/Documentos/zlibrary/libros1920-1921/url' try: os.mkdir(self.carpetaUrl) except OSError as e: if e.errno != errno.EEXIST: raise self.escrivirUrlWed = csv.writer( open('/home/dgc7/Documentos/zlibrary/libros1920-1921/url/url2.csv', 'w')) self.imprimirUrlPdf = csv.writer( open( '/home/dgc7/Documentos/zlibrary/libros1920-1921/url/urlDowload2.csv', 'w')) def credenciales(self, numeroUsuario): self.correo = self.usuario[numeroUsuario] self.contraseña = self.contraseñaTxT[numeroUsuario] self.urlLoguin = 'http://zlibraryexau2g3p.onion' self.zLibraty.load_url(self.urlLoguin) def UsuariosYcontraseñas(self): self.dir = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/descargarLIbros/descargarparte1/contraseñasYcorreos.txt' self.data = open(self.dir, 'r+') for self.i in range(0, 200): if self.i % 2 == 0: self.usuario.append(self.data.readline()) if self.i % 2 != 0: self.contraseñaTxT.append(self.data.readline()) def urlPdf(self, contador, _contadorusuarios): self.boleanoPdf = 0 self.contadorUsuariosCon = _contadorusuarios self.contadorLibros2 = 0 self.contadorLibros = 0 self.Crearcsv() self.soup = BeautifulSoup(self.html, 'html.parser') for self.urlwed in self.soup.find_all(itemprop="name"): self.contador = 0 self.urlwed = self.urlwed.find('a', href=re.compile('')) self.urlDowload = self.urlwed.get('href') self.urlpdfGeleneralH = re.sub('/book/', 'https://b-ok.cc/book/', self.urlDowload) self.urlDowload = re.sub('/book/', 'http://zlibraryexau2g3p.onion/book/', self.urlDowload) self.escrivirUrlWed.writerow([self.urlDowload]) print(self.urlDowload) self.contadorLibros += 1 self.contadorLibros2 += 1 if self.contadorLibros2 == 10: self.contador += 1 self.serrarTor() sleep(4) self.iniciarTor() self.contadorUsuariosCon += 1 print(self.contadorUsuariosCon) self.credenciales(contadorusuarios) self.iniciarSecion() sleep(7) self.contadorLibros2 = 0 sleep(15) if self.contador == 5: self.contador = 0 voleano = validarFormato(self.urlpdfGeleneralH) for self.urlRedirec in range(0, 1): self.zLibraty.load_url(self.urlDowload) sleep(5) self.htmlPdf = self.zLibraty.page_source self.soupRedirec = BeautifulSoup(self.htmlPdf, 'html.parser') self.urlDowloadPDF = self.soupRedirec.find( class_="btn btn-primary dlButton addDownloadedBook") self.urlDowloadPDF = self.urlDowloadPDF.get('href') self.urlDowloadPDF = re.sub( '/dl/', 'http://zlibraryexau2g3p.onion/dl/', self.urlDowloadPDF) self.imprimirUrlPdf.writerow([self.urlDowloadPDF]) print(self.urlDowloadPDF) if voleano == True: self.zLibraty.get(self.urlDowloadPDF) voleano = False else: self.convertirpdf = str(self.urlDowloadPDF) + str( self.conversor) self.zLibraty.get(self.convertirpdf) sleep(20) tiempoDescarga() informaiconPDf(self.urlpdfGeleneralH) def DescargarContenido(self, _html): self.contenido = _html def serrarTor(self): self.zLibraty.close()
# url = "https://check.torproject.org" url = "https://www.google.com/search?q=playoffs" headers = {} headers["User-agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36" driver.load_url(url) # driver.context() # Find the element for success element = driver.find_element_by_class_name('LC20lb') print(element) sleep(2) # So that we can see the page driver.close() # class TestSite(unittest.TestCase): # def setUp(self): # # Point the path to the tor-browser_en-US directory in your system # tbpath = '/home/andrew/Desktop/tor-browser-linux64-8.0.8_en-US/tor-browser_en-US/' # self.driver = TorBrowserDriver(tbpath, tbb_logfile_path='test.log', tor_cfg=cm.USE_STEM) # self.url = "https://check.torproject.org" # def tearDown(self): # # We want the browser to close at the end of each test. # self.driver.close() # def test_available(self): # self.driver.load_url(self.url)
class crearCorros: def __init__(self): self.urlProtocoe = 'http://3g2upl4pq6kufc4m.onion', 'https://mail.protonmail.com/create/new', 'https://singlelogin.org/registration.php' print(self.urlProtocoe[2]) self.tbb_dir = "/usr/local/share/tor-browser_en-US" self.protocoe = TorBrowserDriver(self.tbb_dir, tbb_logfile_path='test.log') self.dirNombre = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/nombre.txt' self.nombre = open(self.dirNombre, 'r+') self.dirapellido = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/apellidos.txt' self.apellido = open(self.dirapellido, 'r+') self.dirContrasenna = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña.txt' self.contrasenna = open(self.dirContrasenna, 'r+') self.dirCotrasenna2 = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña2.txt' self.Contrasenna2 = open(self.dirCotrasenna2, 'r+') self.datosContrasenna = [] self.lista = [] for self.d in range(0, 101): self.lista.append(self.nombre.readline() + 'asdsdf') self.datosContrasenna.append(self.contrasenna.readline() + self.Contrasenna2.readline()) for self.d in range(0, 100): self.lista[self.d] = re.sub('\n', 'asdaawderca', self.lista[self.d]) self.datosContrasenna[self.d] = re.sub( '\n', 'radabanals', self.datosContrasenna[self.d]) self.lista[self.d] = re.sub( r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+", r"\1", normalize("NFD", self.lista[self.d]), 0, re.I) self.lista[self.d] = normalize('NFC', self.lista[self.d]) self.datosContrasenna[self.d] = re.sub( r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+", r"\1", normalize("NFD", self.datosContrasenna[self.d]), 0, re.I) self.datosContrasenna[self.d] = normalize( 'NFC', self.datosContrasenna[self.d]) def iniciarTor(self): self.protocoe.load_url(self.urlProtocoe[2]) def ingresarDatos(self, fila): self.pasword = self.protocoe.find_element_by_name("password") self.pasword.click() sleep(random.uniform(1.0, 4)) self.pasword.send_keys(self.datosContrasenna[fila]) self.pasword = self.protocoe.find_element_by_name("passwordc") sleep(random.uniform(1.0, 3)) self.pasword.click() self.pasword.send_keys(self.datosContrasenna[fila]) sleep(random.uniform(2.0, 5.7)) self.iframes = self.protocoe.find_element_by_tag_name("iframe") self.protocoe.switch_to.frame(self.iframes) self.usuario = self.protocoe.find_element_by_xpath('//input') self.usuario.click() self.usuario.send_keys(self.lista[fila]) sleep(random.uniform(0, 5)) self.usuario.send_keys(Keys.ENTER) self.protocoe.switch_to.default_content() sleep(20) self.enter = self.protocoe.find_element_by_xpath( '//button[@class="pm_button primary modal-footer-button"]') self.enter.click() def serrarTor(self): self.protocoe.close() def imprimirDatos(self): for d in range(0, 100): print(self.lista[d]) print(self.datosContrasenna[d])
class Browser: def __init__(self, config, browser, pet, env_type, proxy_setting): """ If given valid proxy settings, this function will configure socks5 proxy properly on chrome (brave) and firefox. """ def setup_socks5_proxy(browser, profile, proxy_setting): if proxy_setting is not None: address = proxy_setting["address"] port = proxy_setting["port"] bypass_list = proxy_setting["bypass-list"] if browser == "chrome": # https://sordidfellow.wordpress.com/2015/05/21/ssh-tunnel-for-chrome/ profile.add_argument("--proxy-server=socks5://%s:%s" % (address, port)) profile.add_argument("--proxy-bypass-list=%s" % bypass_list) print("socks5 proxy configured on chrome") elif browser == "firefox": # https://developer.mozilla.org/en-US/docs/Mozilla/Preferences/Mozilla_networking_preferences profile.set_preference("network.proxy.type", 1) profile.set_preference("network.proxy.socks", address) profile.set_preference("network.proxy.socks_port", port) profile.set_preference("network.proxy.socks_version", 5) profile.set_preference("network.proxy.socks_remote_dns", "true") profile.set_preference("network.proxy.no_proxies_on", bypass_list) print("socks5 proxy configured on firefox") """ If the program is run in a virtual machine, xvfbwrapper has to get installed first. """ self.env_type = env_type if (env_type == "vm"): print("xvfb") from xvfbwrapper import Xvfb width, height, depth = get_display_parameters(config) self.vdisplay = Xvfb(width=width, height=height, colordepth=depth) self.vdisplay.start() print("Browser:", browser, "PET:", pet) pet_config = PetConfig() if pet == "brave": print("brave") chrome_options = ChromeOptions() bPath, dPath = pet_config.getPetBrowserDriverPath(pet,browser,env_type) print(bPath, dPath) chromedriver = dPath chrome_options.binary_location = bPath setup_socks5_proxy("chrome", chrome_options, proxy_setting) os.environ["webdriver.chrome.driver"] = chromedriver if env_type == "vm": chrome_options.add_argument("--no-sandbox") self.driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_options) press_enter(1) return elif pet == "tor": plt= platform.system().lower() if plt == "darwin" or plt == "windows": # https://stackoverflow.com/questions/15316304/open-tor-browser-with-selenium print("native tor") bPath, dPath = pet_config.getPetBrowserDriverPath(pet,browser,env_type) print(bPath, dPath) profile = FirefoxProfile() profile.set_preference("network.proxy.type", 0) binary = FirefoxBinary(bPath) self.driver = webdriver.Firefox(firefox_profile = profile, firefox_binary= binary, executable_path = dPath) elif plt == "linux": # https://medium.com/@manivannan_data/selenium-with-tor-browser-using-python-7b3606b8c55c print("vm tor") from tbselenium.tbdriver import TorBrowserDriver pref_dict = {"network.proxy.no_proxies_on": "http://10.0.2.2/, http://192.168.4.204/"} self.driver = TorBrowserDriver(os.environ['TBB_PATH'], pref_dict = pref_dict) return aPath, bPath, dPath, pref = pet_config.getPetBrowserDriverPath(pet,browser,env_type) if (browser == "firefox"): fp = FirefoxProfile() setup_socks5_proxy("firefox", fp, proxy_setting) binary = FirefoxBinary(bPath) if pref != None: fp.set_preference(pref[0],pref[1]) self.driver = webdriver.Firefox(firefox_profile=fp, firefox_binary=binary, executable_path=dPath) if (aPath): self.driver.install_addon(aPath) elif (browser == "chrome"): chrome_options = ChromeOptions() chrome_options = webdriver.ChromeOptions() #https://github.com/SeleniumHQ/selenium/issues/5966 setup_socks5_proxy("chrome", chrome_options, proxy_setting) if aPath: chrome_options.add_extension(aPath) if pref != None: chrome_options.add_experimental_option(pref[0],pref[1]) chrome_options.binary_location = bPath os.environ["webdriver.chrome.driver"] = dPath time.sleep(1) self.driver = webdriver.Chrome(executable_path=dPath, chrome_options=chrome_options) # to escape the alert chrome display on first visit time.sleep(1) press_enter(1) elif(browser == "safari"): self.driver = webdriver.Safari() else: print("Unsupported Browser") sys.exit(0) def quit(self): try: self.driver.quit() except: self.driver.close() # for Tor if (self.env_type == "vm"): self.vdisplay.stop() def visit_sites(self, site_list, delay=5): """Visits all pages in site_list with delay""" for site in site_list: sys.stdout.write(".") sys.stdout.flush() try: self.driver.get(site) time.sleep(delay) except: print("Unexpected error:", sys.exc_info()[0])
class UntitledTestCase(unittest.TestCase): def setUp(self): print 'Loading...' self.display = Display(visible=0, size=(800, 600)) self.display.start() self.driver = TorBrowserDriver( '/scratch/zilton/troll/tor-browser_pt-BR/', tbb_logfile_path='test.log') # self.driver = webdriver.Chrome('chromium-browser') self.base_url = "https://lemonade.ctweb.inweb.org.br/#/workflows/1/" self.verificationErrors = [] self.accept_next_alert = True def is_visible(self, locator, timeout=20): try: ui.WebDriverWait(self.driver, timeout).until( ec.visibility_of_element_located((By.ID, locator))) return True except TimeoutException: return False def is_not_visible(self, locator, timeout=2): try: ui.WebDriverWait(self.driver, timeout).until_not( ec.visibility_of_element_located((By.ID, locator))) return True except TimeoutException: return False def test_untitled_test_case(self): global workflow_message_error_warning, workflow_message_completed driver = self.driver '''Login''' driver.get("https://lemonade.ctweb.inweb.org.br/#/login") driver.find_element_by_xpath("//input[@type='email']").clear() driver.find_element_by_xpath("//input[@type='email']").send_keys( lemonade_login) driver.find_element_by_xpath("//input[@type='password']").clear() driver.find_element_by_xpath("//input[@type='password']").send_keys( lemonade_password) driver.find_element_by_xpath("//button[@type='submit']").click() time.sleep(LOAD_TIME) count_progress = 1.0 length = len(workflow_ids) index = 0 count_problem = 1 while index < length: workflow_id = workflow_ids[index] '''Access the page of the workflow''' url = self.base_url + str(workflow_id) driver.get(url) '''Execute the workflow''' while True: try: time.sleep(LOAD_TIME * 0.2) driver.find_element_by_id("tlb-execute-wf").click() break except Exception: pass while True: try: time.sleep(LOAD_TIME * 0.2) driver.find_element_by_id("mdl-execute-wf").click() break except Exception: pass '''Monitoring the status of the execution''' time.sleep(LOAD_TIME) status = WAITING_MSG current_url = driver.current_url # Workflow with problem if current_url == "https://lemonade.ctweb.inweb.org.br/#/" and count_problem < MAX_LOAD_PROBLEM: count_problem += 1 continue elif count_problem == MAX_LOAD_PROBLEM: status = WARNING_MSG while (status is WAITING_MSG) or (status == RUNNING_MSG): while True: try: status = str( driver.find_element_by_id("dtl-job-status"). get_attribute(name='title').upper()) if status: break time.sleep(LOAD_TIME * 0.2) except Exception: pass if (status == WAITING_MSG) or (status == RUNNING_MSG): driver.refresh() time.sleep(LOAD_TIME) '''Main message after the execution ends''' message = '' if status != WARNING_MSG: while message == '': try: message = driver.find_element_by_id( "dtl-job-status-text").text break except Exception: pass driver.refresh() time.sleep(LOAD_TIME) workflow_name = '' while True and count_problem < MAX_LOAD_PROBLEM: try: time.sleep(LOAD_TIME * 0.2) workflow_name = driver.find_element_by_xpath( "//a[contains(@href, '#/workflows/1/%s')]" % workflow_id).text break except Exception: pass if status == WARNING_MSG: message += ' - The execution presented an atypical problem. ' \ 'Please check the workflow and the correct ' \ 'update of the messages on the Lemonade page.' msg_dict = { 'workflow_name': workflow_name, 'workflow_id': workflow_id, 'message': message, 'status': status, 'url': url } if status != COMPLETED_MSG: workflow_message_error_warning.append(msg_dict) else: workflow_message_completed += " " + workflow_id UntitledTestCase.update_progress( job_title='Testing Lemonade workflow: ', progress=count_progress) count_progress += 1 index += 1 count_problem = 1 self.driver.close() @staticmethod def update_progress(job_title, progress): global workflow_ids length = len(workflow_ids) progress = progress / length block = int(round(length * progress)) message = "\r{0}: [{1}] {2}%".format( job_title, ', '.join(workflow_ids[:int(progress * length)]) + "-" * (length - block), round(progress * 100, 2)) if progress >= 1: message += " DONE\r\n" sys.stdout.write(message) sys.stdout.flush() def is_element_present(self, how, what): try: self.driver.find_element(by=how, value=what) except NoSuchElementException: return False return True def is_alert_present(self): try: self.driver.switch_to_alert() except NoAlertPresentException: return False return True def close_alert_and_get_its_text(self): try: alert = self.driver.switch_to_alert() alert_text = alert.text if self.accept_next_alert: alert.accept() else: alert.dismiss() return alert_text finally: self.accept_next_alert = True def tearDown(self): UntitledTestCase.sendEmail() self.driver.quit() self.display.stop() self.assertEqual([], self.verificationErrors) @staticmethod def sendEmail(): global workflow_message_error_warning, workflow_message_completed if len(workflow_message_error_warning) > 0: workflow_message_completed = re.sub("^\s+|\s+$", "", workflow_message_completed) message = 'WORKFLOWS THAT PERFORMED CORRECTLY: %s' % ( workflow_message_completed.replace(' ', ', ')) message += '\n\nWORKFLOWS THAT DID NOT RUN SUCCESSFULLY:\n' for m in workflow_message_error_warning: if m['status'] == WARNING_MSG: message += '\n- WORKFLOW: %s' % m['workflow_id'] else: message += '\n- WORKFLOW: %s' % m['workflow_name'] message += '\n\tSTATUS: %s' % m['status'] message += '\n\tMESSAGE: %s' % m['message'] message += '\n\tURL: %s' % m['url'] message += '\n___________________________\n' subject = "[LEMONADE] - Automatic Test for Workflows" email_sender.main(message_status_report=message.encode('utf-8'), subject=subject)
def makeRequest(url, domain): """ Makes HTTP request to url given as argument, after changing IP. """ import time # Opening log file f = open(logfile_name, 'a') print('Changing IP...\n') # Below is method A using requests library without opening real TOR browser. # Method B will be used instead, which opens a real browser, so that JS code is executed # and Google Analytics tracks us as a real user. """ # Resetting IP tr.reset_identity() # This command changes restarts tor service, resulting in IP address change. After '-p' flag insert user password. #os.system('sudo systemctl restart tor -p 0000') #Creating empty session object session = requests.session() session.proxies = {} # Adding proxies to session session.proxies['http'] = 'socks5h://localhost:9050' session.proxies['https'] = 'socks5h://localhost:9050' #Changing request headers headers = {} headers['User-agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36' print('Request headers were set.\n') - new_ip = session.get('http://ipecho.net/plain').text # Executing requests #Executing request and assigning response status code status_code = session.get(url).status_code """ # Method B, using complete TOR Browser driver = TorBrowserDriver("/home/manos/Desktop/tor-browser_en-US") # driver.get('https://ipecho.net/plain') # new_ip = driver.find_element_by_tag_name('body').text checkConn() driver.get(url) time.sleep(2.0) driver.close() # Request logging time = 'Date: ' + str(datetime.datetime.now())[0:10] + '\nTime: ' + str( datetime.datetime.now())[11:19] f.write( time + '\nDomain: ' + domain + '\n' 'Request sent to ' + url + '.' + '\nResponse status code: ' + str(200) + '\n*******************************************************************************************\n\n' ) f.close() os.system('clear')
class DescargarPdf: def __init__(self): self.contadorCredenciales=0 self.tbb_dir = "/usr/local/share/tor-browser_en-US" self.usuario=[] self.contraseñaTxT=[] self.conversor='?convertedTo=pdf' def iniciarTor(self): self.zLibraty = TorBrowserDriver(self.tbb_dir, tbb_logfile_path='test.log') def iniciarSecion(self): self.element=self.zLibraty.find_element_by_name("email") self.element.send_keys(self.correo) sleep(2) self.element2=self.zLibraty.find_elements_by_class_name("form-control")[1] self.element2.send_keys(self.contraseña) self.element2.send_keys(Keys.RETURN) def paginaDescargas(self): print("estoy en la funcion paginaDescagas") self.zLibraty.load_url(self.url) sleep(4) self.html=self.zLibraty.page_source def paginaPrinsipal(self,añoInicial,añoFinal): self.urlAños='http://zlibraryexau2g3p.onion/s/?yearFrom='+str(añoInicial)+'&yearTo='+str(añoFinal) self.url=self.urlAños def cambiarPagina(self,x): print("estoy en cambiar pagina prinsipal") self.url+='&page='+str(x) print(self.url) def Crearcsv(self): desde=datosDescarga(1) asta=datosDescarga(2) self.carpetaUrl='/home/dd/Documentos/zlibrary/libros'+str(desde)+'-'+str(asta)+'/url' try : os.mkdir(self.carpetaUrl) except OSError as e: if e.errno != errno.EEXIST: raise self.escrivirUrlWed=csv.writer(open('/home/dd/Documentos/zlibrary/libros'+str(desde)+'-'+str(asta)+'/url/url2.csv','w')) self.imprimirUrlPdf=csv.writer(open('/home/dd/Documentos/zlibrary/libros'+str(desde)+'-'+str(asta)+'/url/urlDowload2.csv','w')) def credenciales(self,numeroUsuario): print("llegue") if self.contadorCredenciales==0 or self.contadorCredenciales==20: self.zLibraty.load_url("https://singlelogin.org/") self.zLibraty.find_element_by_name("redirectToHost").click() sleep(3) pyautogui.press("down") sleep(2) pyautogui.press("down") sleep(1) pyautogui.press("enter") sleep(5) self.correo=self.usuario[numeroUsuario] self.contraseña=self.contraseñaTxT[numeroUsuario] def UsuariosYcontraseñas(self): self.dir='/home/dd/Documentos/zlibrary/credenciales/contraseñasYcorreos.txt' self.data=open(self.dir,'r+') for self.i in range(0,200): if self.i%2==0 : self.usuario.append(self.data.readline()) if self.i%2!=0: self.contraseñaTxT.append(self.data.readline()) def urlPdf(self,): self.contadorCredenciales=1 self.boleanoPdf=0 self.respaldoContador=0 self.contadorUsuarios=usuarioUsadosLeer() self.contadorLibros=datosDescarga(4) self.contadorLibros2=self.contadorLibros%10 self.Crearcsv() self.soup=BeautifulSoup(self.html,'html.parser') try: for self.urlwed in self.soup.find_all(itemprop = "name") : self.contador=0 self.urlwed=self.urlwed.find('a',href=re.compile('')) self.urlDowload=self.urlwed.get('href') self.urlpdfGeleneralH=re.sub('/book/','https://b-ok.cc/book/',self.urlDowload) self.urlDowload=re.sub('/book/','http://zlibraryexau2g3p.onion/book/',self.urlDowload) self.escrivirUrlWed.writerow([self.urlDowload]) print(self.urlDowload) self.voleano=validarFormato(self.urlpdfGeleneralH) guardarNumeroDescargas(self.contadorLibros) print(self.respaldoContador) if self.contadorLibros==self.respaldoContador: for self.urlRedirec in range(0,1): self.zLibraty.load_url(self.urlDowload) sleep(5) self.htmlPdf=self.zLibraty.page_source self.soupRedirec=BeautifulSoup(self.htmlPdf,'html.parser') self.urlDowloadPDF=self.soupRedirec.find(class_="btn btn-primary dlButton addDownloadedBook") self.urlDowloadPDF=self.urlDowloadPDF.get('href') self.urlDowloadPDF=re.sub('/dl/','http://zlibraryexau2g3p.onion/dl/',self.urlDowloadPDF) self.imprimirUrlPdf.writerow([self.urlDowloadPDF]) print(self.urlDowloadPDF) print("vamos a por el if") sleep(15) if self.voleano==True: self.zLibraty.set_page_load_timeout(12) try: self.zLibraty.load_url(self.urlDowloadPDF) except: sleep(5) self.zLibraty.set_page_load_timeout(7000) print("funciona PDF ") self.voleano=False sleep(5) self.contadorLibros+=1 self.contadorLibros2+=1 else: self.zLibraty.set_page_load_timeout(12) try: self.zLibraty.load_url(self.urlDowloadPDF) except: sleep(8) pyautogui.press("down") sleep(2) pyautogui.press("enter") self.zLibraty.set_page_load_timeout(7000) sleep(5) self.contadorLibros+=1 self.contadorLibros2+=1 self.zLibraty.load_url("about:downloads") self.datosEsperaDescarga() self.peticiones() self.zLibraty.back() informaiconPdf(self.urlpdfGeleneralH) guardarNumeroDescargas(self.contadorLibros) self.respaldoContador+=1 if self.contadorLibros==self.respaldoContador: if self.contadorLibros2%10==0: print((self.contadorLibros2-1)%10) self.contador+=1 if self.contadorLibros==20: self.contadorCredenciales=20 print("saliendo de secion¡¡¡¡¡¡") pyautogui.moveTo(1707,245) pyautogui.hotkey("ctrl","shift","u") sleep(2) pyautogui.press("enter") sleep(7) pyautogui.press("enter") sleep(15) else: print("saliendo de secion") self.zLibraty.get("http://zlibraryexau2g3p.onion/logout.php") self.contadorUsuarios+=1 print(self.contadorUsuarios) try: self.zLibraty.switch_to_window(self.zLibraty.window_handles[0]) except: print("error al cambian de ventana") usuarioUsadosReescrivir(self.contadorUsuarios) print("por aqui¿¿¿¿¿¿") self.credenciales(self.contadorUsuarios) self.contadorCredenciales=1 print("no por aqui¿¿¿¿¿¿") sleep(20) self.iniciarSecion() sleep(15) self.paginaDescargas() sleep(7) self.contadorLibros2=0 sleep(15) print("numero de li bros por usuario ",self.contadorLibros2) if self.contador==5: self.contador=0 except OSError as e : print(e.strerror) print("error en la urlPdf:::::") guardarNumeroDescargas(self.contadorLibros) usuarioUsadosReescrivir(self.contadorUsuarios) print(self.contadorLibros) archivos=int(contarNueroArchivos()) print(archivos) self.zLibraty.load_url("about:downloads") self.datosEsperaDescarga() self.peticiones() self.zLibraty.back() informaiconPdf(self.urlpdfGeleneralH) def DescargarContenido(self,_html): self.contenido=_html def serrarTor(self): self.zLibraty.close() def datosEsperaDescarga(self): sleep(4) self.htmlValidador=self.zLibraty.page_source def validarDescarga(self): self.htmlFalce=self.zLibraty.page_source self.soupFalce=BeautifulSoup(self.htmlFalce,"html.parser") self.validarfalce=self.soupFalce.find_all("description",class_="downloadDetails downloadDetailsNormal") self.respuestafalce=re.search("value=.+",str(self.validarfalce)) self.buscarFalse=self.respuestafalce.group() if re.search("Canceled",self.buscarFalse): print("se daño al descarga =(") sleep(5) pyautogui.click(1393,139) sleep(5) else : if re.search("Failed",self.buscarFalse): print("se daño al descarga pero vamos a solucionarlo =( ") sleep(5) pyautogui.click(1393,139) sleep(5) else: print("la descarga va bien =)") def peticiones(self): self.validarDescarga() self.carga=0 self.daño=0 self.conteo=0 while self.carga<100: self.soup=BeautifulSoup(self.htmlValidador,"html.parser") try: self.archivoDescarga=self.soup.find_all("progress",class_="downloadProgress") self.respaldo=re.split("value",str(self.archivoDescarga)) self.tiempo=re.search("[0-9]+",self.respaldo[1]) print(self.tiempo.group()) self.carga=int(self.tiempo.group()) self.datosEsperaDescarga() sleep(3) self.validarDescarga() if self.conteo==3: pyautogui.press("enter") self.conteo=0 except: print("o no ,se daño la descargar y no la e podido volver a iniciar") if self.daño==7: os.system('rm -r /home/dd/zlibros/libros1920-1921/libro/*.*') raise self.daño+=1 sleep(5)
class TruliaHelper(): def __init__(self): self.url = 'https://www.trulia.com' # need to set Tor Browser path here. tbpath = "/home/gc14/Documents/softwares/tor-browser_en-US" self.driver = TorBrowserDriver(tbb_path=tbpath, tbb_logfile_path='test.log') # self.driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=binary) # self.driver = webdriver.Chrome(executable_path='../utility/chromedriver.exe', chrome_options=chrome_options) # method to get items from given link. def getItems(self): items = [] # keywords = ['512 W 10th St Perris CA 92570', 'New York, NY', 'San Francisco, CA', 'Washington, CA'] keywords = ['512 W 10th St Perris CA 92570'] * 2 for keyword in keywords: self.driver.get(self.url) search_box = self.driver.find_element_by_id( "homepageSearchBoxTextInput") search_box.clear() search_box.send_keys(keyword) search_btn = self.driver.find_element_by_xpath( "//button[@data-auto-test-id='searchButton']") if search_btn: print("Going to click") search_btn.click() time.sleep(10) items.append(self.getItemDetail()) self.driver.close() return items def getItemDetail(self): data = {} try: soup = BeautifulSoup(self.driver.page_source, u'html.parser') image = soup.find("div", attrs={ "class": "Tiles__TileBackground-fk0fs3-0 cSObNX" }).find("img")["src"] price = soup.find( "div", attrs={ "class": "Text__TextBase-sc-1cait9d-0-div Text__TextContainerBase-sc-1cait9d-1 hlvKRM" }).text # container = soup.find("div", attrs={"class": "resultsColumn"}).find("ul") # items = container.findAll("li", recursive=False) data.update({"image": image, "price": price}) except: pass return data # method to write csv file def writeCSVFile(self, data): try: with open( '/home/gc14/Documents/fiverr/custom_scrapers/home/trulia.csv', mode='w') as csv_file: fieldnames = ['Image', 'Price'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for d in data: writer.writerow({'Image': d['image'], 'Price': d['price']}) csv_file.close() print("File written successfully.") except: print(sys.exc_info()) pass # method to start process. def start(self): items = self.getItems() print("Items : ", len(items)) if items: self.writeCSVFile(items)