class TruliaHelper():

    def __init__(self):
        self.url = 'https://www.trulia.com'
        # need to set chrome path here.
        tbpath = "/home/XX/XXXX/tor-browser-linux64-8.0.8_en-US/tor-browser_en-US"
        self.driver = TorBrowserDriver(tbb_path=tbpath, tbb_logfile_path='test.log')
        # self.driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=binary)
        # self.driver = webdriver.Chrome(executable_path='../utility/chromedriver.exe', chrome_options=chrome_options)

    # method to get items from given link.
    def getItems(self):
        df=pd.read_excel("/home/XXXXX/XXXXX/XXXXXX.xlsx")
        a=df['Site Address']
        b=df['Site City']
        c=df['Site State']
        d=df['Site Zip']
        items = []
        # keywords = ['512 W 10th St Perris CA 92570', 'New York, NY', 'San Francisco, CA', 'Washington, CA']
        for keyword in (pd.concat([a,b,c,d],axis=1)).values.tolist():
#         keywords = ['512 W 10th St Perris CA 92570'] * 10
#         for keyword in keywords:
            self.driver.get(self.url)
            search_box = self.driver.find_element_by_id("homepageSearchBoxTextInput")
            search_box.clear()
            search_box.send_keys(str(keyword))
            search_btn = self.driver.find_element_by_xpath("//button[@data-auto-test-id='searchButton']")
            if search_btn:
                search_btn.click()
                time.sleep(10)
                items.append(self.getItemDetail())
            # break
        self.driver.close()
        return items


    def getItemDetail(self):
        data = {}
        try:
            soup = BeautifulSoup(self.driver.page_source, u'html.parser')
            #image = soup.find("div", attrs={"class": "Tiles__TileBackground-fk0fs3-0 cSObNX"}).find("img")["src"]
            price = soup.find("div", attrs={"class": "Text__TextBase-sc-1cait9d-0-div Text__TextContainerBase-sc-1cait9d-1 hlvKRM"}).text
            # container = soup.find("div", attrs={"class": "resultsColumn"}).find("ul")
            # items = container.findAll("li", recursive=False)
            print(price)
        except:
            pass
        return data
  
    # method to start process.
    def start(self):
        items = self.getItems()
        print("Items : ",items)
Esempio n. 2
0
class crearCorros:
    def __init__(self):
        self.urlProtocoe = 'http://3g2upl4pq6kufc4m.onion', 'https://mail.protonmail.com/create/new', 'https://singlelogin.org/registration.php'
        print(self.urlProtocoe[2])
        self.tbb_dir = "/usr/local/share/tor-browser_en-US"
        self.protocoe = TorBrowserDriver(self.tbb_dir,
                                         tbb_logfile_path='test.log')
        self.dirNombre = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/nombre.txt'
        self.nombre = open(self.dirNombre, 'r+')
        self.dirapellido = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/apellidos.txt'
        self.apellido = open(self.dirapellido, 'r+')
        self.dirContrasenna = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña.txt'
        self.contrasenna = open(self.dirContrasenna, 'r+')
        self.dirCotrasenna2 = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña2.txt'
        self.Contrasenna2 = open(self.dirCotrasenna2, 'r+')
        self.datosContrasenna = []
        self.lista = []
        for self.d in range(0, 101):
            self.lista.append(self.nombre.readline() + 'asdsdf')
            self.datosContrasenna.append(self.contrasenna.readline() +
                                         self.Contrasenna2.readline())
        for self.d in range(0, 100):
            self.lista[self.d] = re.sub('\n', 'asdaawderca',
                                        self.lista[self.d])
            self.datosContrasenna[self.d] = re.sub(
                '\n', 'radabanals', self.datosContrasenna[self.d])
            self.lista[self.d] = re.sub(
                r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+",
                r"\1", normalize("NFD", self.lista[self.d]), 0, re.I)
            self.lista[self.d] = normalize('NFC', self.lista[self.d])
            self.datosContrasenna[self.d] = re.sub(
                r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+",
                r"\1", normalize("NFD",
                                 self.datosContrasenna[self.d]), 0, re.I)
            self.datosContrasenna[self.d] = normalize(
                'NFC', self.datosContrasenna[self.d])

    def iniciarTor(self):
        self.protocoe.load_url(self.urlProtocoe[2])

    def ingresarDatos(self, fila):
        self.pasword = self.protocoe.find_element_by_name("password")
        self.pasword.click()
        sleep(random.uniform(1.0, 4))
        self.pasword.send_keys(self.datosContrasenna[fila])
        self.pasword = self.protocoe.find_element_by_name("passwordc")
        sleep(random.uniform(1.0, 3))
        self.pasword.click()
        self.pasword.send_keys(self.datosContrasenna[fila])
        sleep(random.uniform(2.0, 5.7))
        self.iframes = self.protocoe.find_element_by_tag_name("iframe")
        self.protocoe.switch_to.frame(self.iframes)
        self.usuario = self.protocoe.find_element_by_xpath('//input')
        self.usuario.click()
        self.usuario.send_keys(self.lista[fila])
        sleep(random.uniform(0, 5))
        self.usuario.send_keys(Keys.ENTER)
        self.protocoe.switch_to.default_content()
        sleep(20)
        self.enter = self.protocoe.find_element_by_xpath(
            '//button[@class="pm_button primary modal-footer-button"]')
        self.enter.click()

    def serrarTor(self):
        self.protocoe.close()

    def imprimirDatos(self):
        for d in range(0, 100):
            print(self.lista[d])
            print(self.datosContrasenna[d])
Esempio n. 3
0
        cursor.execute(sql)
        result = cursor.fetchall()
        originalTerm = result[0]['term']
        print(originalTerm)
        term = originalTerm.replace("-", " ")

        driver.implicitly_wait(10)

        driver.get(
            "https://www.udemy.com/instructor/marketplace-insights/?q=" +
            term + "&lang=en")
        sleep(random.randint(5, 8))

        try:
            demandEl = driver.find_element_by_xpath(
                '//div[contains(@class,"panel-body")]/div[contains(@class,"course-label-metrics-opportunity")]/div[1]/div/div[2]'
            )
            print(demandEl.text)
        except NoSuchElementException:
            print("trying hyphenated...")
            term = term.replace(" ", "-")
            driver.get(
                "https://www.udemy.com/instructor/marketplace-insights/?q=" +
                term + "&lang=en")
            sleep(random.randint(5, 8))
            demandEl = driver.find_element_by_xpath(
                '//div[contains(@class,"panel-body")]/div[contains(@class,"course-label-metrics-opportunity")]/div[1]/div/div[2]'
            )
            print(demandEl.text)

        demandStr = demandEl.text
Esempio n. 4
0
class crearCorros:
    def __init__(self):
        self.urlProtocoe = 'http://3g2upl4pq6kufc4m.onion', 'https://mail.protonmail.com/create/new', 'https://singlelogin.org/registration.php', 'https://singlelogin.org/?logoutAll'
        print(self.urlProtocoe[2])
        self.tbb_dir = "/usr/local/share/tor-browser_en-US"
        self.protocoe = TorBrowserDriver(self.tbb_dir,
                                         tbb_logfile_path='test.log')
        self.dirNombre = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/nombre.txt'
        self.nombre = open(self.dirNombre, 'r+')
        self.dirapellido = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/apellidos.txt'
        self.apellido = open(self.dirapellido, 'r+')
        self.dirContrasenna = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña.txt'
        self.contrasenna = open(self.dirContrasenna, 'r+')
        self.dirCotrasenna2 = '/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/crearCorreos/contraseña2.txt'
        self.Contrasenna2 = open(self.dirCotrasenna2, 'r+')
        self.datosContrasenna = []
        self.lista = []
        for self.d in range(0, 101):
            self.lista.append(self.nombre.readline() + 'asdsdf')
            self.datosContrasenna.append(self.contrasenna.readline() +
                                         "blabal")
        for self.d in range(0, 100):
            self.lista[self.d] = re.sub('\n', 'asdaawderca',
                                        self.lista[self.d])
            self.datosContrasenna[self.d] = re.sub(
                '\n', 'radabanals', self.datosContrasenna[self.d])
            self.lista[self.d] = re.sub(
                r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+",
                r"\1", normalize("NFD", self.lista[self.d]), 0, re.I)
            self.lista[self.d] = normalize('NFC', self.lista[self.d])
            self.datosContrasenna[self.d] = re.sub(
                r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+",
                r"\1", normalize("NFD",
                                 self.datosContrasenna[self.d]), 0, re.I)
            self.datosContrasenna[self.d] = normalize(
                'NFC', self.datosContrasenna[self.d])
            self.lista[self.d] += '@maildrop.cc'

    def iniciarTor(self):
        self.protocoe.load_url(self.urlProtocoe[3])

    def ingresarDatos(self, fila):
        self.eamil = self.protocoe.find_element_by_name('email')
        self.eamil.click()
        sleep(random.uniform(1.0, 4))
        self.eamil.send_keys(self.lista[fila])
        self.pasword = self.protocoe.find_element_by_name("password")
        self.pasword.click()
        sleep(random.uniform(1.0, 5))
        self.pasword.send_keys(self.datosContrasenna[fila])
        sleep(random.uniform(2.0, 4.7))
        self.pasword.send_keys(Keys.RETURN)
        sleep(20)
        self.protocoe.load_url('https://b-ok.cc/profile.php')
        self.resen = self.protocoe.find_element_by_xpath(
            '//a[@id="resendConfEmail"]')
        self.resen.click()
        sleep(4)
        self.alerta = self.protocoe.switch_to_alert()
        print(self.alerta.text)
        self.alerta.accept()

    def serrarTor(self):
        self.protocoe.close()

    def imprimirDatos(self):
        #self.dirscv='/home/dgc7/ejersiciosLibros/pyaton/ejemplos/scrapin/zlibrari/emailFalsos/contraseñasYcorreos.csv'
        #self.datos=csv.writer(open(self.dirscv,'w'))
        for d in range(0, 100):
            #self.datos.writerow([self.lista[d]])
            #self.datos.writerow([self.datosContrasenna[d]])
            print(self.lista[d])
            print(self.datosContrasenna[d])
Esempio n. 5
0
class TruliaHelper():
    def __init__(self):
        self.url = 'https://www.trulia.com'
        # need to set Tor Browser path here.
        tbpath = "/home/gc14/Documents/softwares/tor-browser_en-US"
        self.driver = TorBrowserDriver(tbb_path=tbpath,
                                       tbb_logfile_path='test.log')
        # self.driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=binary)
        # self.driver = webdriver.Chrome(executable_path='../utility/chromedriver.exe', chrome_options=chrome_options)

    # method to get items from given link.
    def getItems(self):
        items = []
        # keywords = ['512 W 10th St Perris CA 92570', 'New York, NY', 'San Francisco, CA', 'Washington, CA']
        keywords = ['512 W 10th St Perris CA 92570'] * 2
        for keyword in keywords:
            self.driver.get(self.url)
            search_box = self.driver.find_element_by_id(
                "homepageSearchBoxTextInput")
            search_box.clear()
            search_box.send_keys(keyword)
            search_btn = self.driver.find_element_by_xpath(
                "//button[@data-auto-test-id='searchButton']")
            if search_btn:
                print("Going to click")
                search_btn.click()
                time.sleep(10)
                items.append(self.getItemDetail())

        self.driver.close()
        return items

    def getItemDetail(self):
        data = {}
        try:
            soup = BeautifulSoup(self.driver.page_source, u'html.parser')
            image = soup.find("div",
                              attrs={
                                  "class":
                                  "Tiles__TileBackground-fk0fs3-0 cSObNX"
                              }).find("img")["src"]
            price = soup.find(
                "div",
                attrs={
                    "class":
                    "Text__TextBase-sc-1cait9d-0-div Text__TextContainerBase-sc-1cait9d-1 hlvKRM"
                }).text
            # container = soup.find("div", attrs={"class": "resultsColumn"}).find("ul")
            # items = container.findAll("li", recursive=False)
            data.update({"image": image, "price": price})
        except:
            pass
        return data

    # method to write csv file
    def writeCSVFile(self, data):
        try:
            with open(
                    '/home/gc14/Documents/fiverr/custom_scrapers/home/trulia.csv',
                    mode='w') as csv_file:
                fieldnames = ['Image', 'Price']
                writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
                writer.writeheader()
                for d in data:
                    writer.writerow({'Image': d['image'], 'Price': d['price']})
                csv_file.close()
            print("File written successfully.")
        except:
            print(sys.exc_info())
            pass

    # method to start process.
    def start(self):
        items = self.getItems()
        print("Items : ", len(items))
        if items:
            self.writeCSVFile(items)