Esempio n. 1
0
def login():  # GG CAPTCHA (abandoned ship)
    r.click('//header[@role="banner"]/div/div/div[3]/div/div/nav/ul/li[6]')
    r.wait(10)
    if r.present('//div[@aria-label="Log in"]/div[2]/div[4]/button') == True:
        # Anti RPA by AirBnB
        r.click('//div[@aria-label="Log in"]/div[2]/div[4]/button')
    if r.present('//button[@data-testid="social-auth-button-email"]') == True:
        # Anti RPA by AirBnB
        r.click('//button[@data-testid="social-auth-button-email"]')
    r.type('//*[@id="email"]', USERNAME)
    r.type('//*[@id="password"]', PASSWORD)
    r.click('//button[@data-veloute="submit-btn-cypress"]')
    r.click('//*[@id="recaptcha-anchor"]/div[1]')
Esempio n. 2
0
def click(xpath, s):
    if r.exist(xpath) & r.present(xpath):
        r.click(xpath)
        return s + 1
    else:
        print('Couldn\'t find' + xpath + ' component')
        return s
Esempio n. 3
0
def close_cookie_popup():
    if r.present("//button[@class='optanon-allow-all accept-cookies-button']"
                 ) == True:
        r.click("//button[@class='optanon-allow-all accept-cookies-button']")
Esempio n. 4
0
def extract_stay_info_as_data(
):  #Generates URL/text in dict instead, shorten time for upload/download, more unified

    data = {
        "0": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": ""
        },
        "1": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": ""
        },
        "2": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": ""
        },
        "3": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": ""
        },
        "4": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": ""
        }
    }

    print('Extracting Top 5 Stay Picture Information (10 Image Max)..')
    url = []
    url = get_stay_url()
    i = 0
    k = 0
    while (i < 5):
        data[str(i)]["url"] = url[i + k]
        r.url(url[i + k])
        print(f'Extracting Text Data - Homestay {i+1}')
        if (r.exist(
                '//*[@data-plugin-in-point-id="TITLE_DEFAULT"]/div/div/section/div/div/h1'
        ) == True):
            data[str(i)]["name"] = r.read(
                '//*[@data-plugin-in-point-id="TITLE_DEFAULT"]/div/div/section/div/div/h1'
            )
            data[str(i)]["description"] = r.read(
                '//*[@data-plugin-in-point-id="OVERVIEW_DEFAULT"]/div/div/div/section/div/div/div/div/div'
            )
            data[str(i)]["description"] = data[str(i)]["description"].replace(
                "\xa0", " ")
            data[str(i)]["inventory"] = r.read(
                '//*[@data-plugin-in-point-id="OVERVIEW_DEFAULT"]/div/div/div/section/div/div/div/div/div[2]'
            )
            data[str(i)]["price"] = r.read(
                '//*[@data-plugin-in-point-id="BOOK_IT_SIDEBAR"]/div/div[2]/div/ul[2]/li/span[2]'
            )  #Total Price
            if r.present(
                    '//*[@data-plugin-in-point-id="REVIEWS_DEFAULT"]/div/div/section/div/div/div/h2/span[2]/span'
            ):
                data[str(i)]["rating"] = r.read(
                    '//*[@data-plugin-in-point-id="REVIEWS_DEFAULT"]/div/div/section/div/div/div/h2/span[2]/span'
                )
            else:
                data[str(i)]["rating"] = "No Reviews Yet"
            r.click('//*[@id="FMP-target"]')
            j = 0
            while (1):
                j = j + 1
                print(f'Extracting Picture Data - Homestay {i+1} Photo {j}')
                r.wait(0.4)
                #r.snap('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img',f"data/{i+1}/{j}.jpg") #fastest but not perfect
                if (r.exist(
                        '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src'
                ) == True):
                    data[str(i)]["picurl"][j - 1] = r.read(
                        '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src'
                    )
                    if (r.present(
                            '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div[2]/div/span/div/span'
                    ) == True):
                        data[str(i)]["pictext"][j - 1] = r.read(
                            '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div[2]/div/span/div/span'
                        )
                    #r.download(dl_link,f'data/{i+1}/{j}.jpg')
                    print(f'Homestay {i+1} Photo {j} extracted!')

                if (r.exist('//*[@aria-label="Next"]') == False or j >= 10):
                    break
                r.click('//*[@aria-label="Next"]')
        else:
            i = i - 1  #Detects Whales (Airbnb Plus spoils the format alot)
            k = k + 1  #Compensating Constant k
            print("WHALE detected, adding one more loop..")
        i = i + 1
    #r.click('/html/body/div[9]/div/div/div/section/div/div[1]/div/button')
    print('Done.')

    return data
Esempio n. 5
0
def extract_stay_info_as_data(
):  #Generates URL/text in dict instead, shorten time for upload/download, more unified

    data = {
        "0": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": "",
            "coordinates": ""
        },
        "1": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": "",
            "coordinates": ""
        },
        "2": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": "",
            "coordinates": ""
        },
        "3": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": "",
            "coordinates": ""
        },
        "4": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": "",
            "coordinates": ""
        }
    }

    print('Extracting Top 5 Stay Picture Information (10 Image Max)..')
    url = []
    url = get_stay_url()
    i = 0
    k = 0
    while (i < 5):
        data[str(i)]["url"] = url[i + k]
        r.url(url[i + k])
        print(f'Extracting Text Data - Homestay {i+1}')
        if (r.exist('//*[@itemprop="name"]/span/h1/span') == True):
            data[str(i)]["coordinates"] = r.read(
                '//*[@data-veloute="map/GoogleMap"]/div/div/div/div[2]/a/@href'
            ).split("=", 1)[1].split("&", 1)[0]

            data[str(i)]["name"] = r.read('//*[@itemprop="name"]/span/h1/span')

            data[str(i)]["description"] = r.read(
                '//*[@href="#neighborhood"]/div')
            #data[str(i)]["description"]=data[str(i)]["description"].replace("\xa0"," ")

            data[str(i)]["inventory"] = r.read(
                '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[1]/div'
            ) + " " + r.read(
                '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[2]/div'
            ) + " " + r.read(
                '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[3]/div'
            ) + " " + r.read(
                '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[4]/div'
            )

            if (r.present('//*[@id="book_it_form"]/div[4]/div[2]') == True):
                data[str(i)]["price"] = r.read(
                    '//*[@id="book_it_form"]/div[4]/div[2]').split("Total",
                                                                   1)[1]
            else:
                data[str(i)]["price"] = r.read(
                    '//*[@id="book_it_form"]/div[2]').split("Total",
                                                            1)[1]  #Total Price

            if r.present('//*[@data-heading-focus="review header"]/div'):
                data[str(i)]["rating"] = r.read(
                    '//*[@data-heading-focus="review header"]/div/div/@aria-label'
                ) + " (" + r.read(
                    '//*[@data-heading-focus="review header"]/div/span') + ")"
            else:
                data[str(i)]["rating"] = "No Reviews Yet"

            r.click('//*[@data-veloute="hero-view-photos-button"]')
            j = 0
            while (1):
                j = j + 1
                print(f'Extracting Picture Data - Homestay {i+1} Photo {j}')
                r.wait(0.4)
                #r.snap('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img',f"data/{i+1}/{j}.jpg") #fastest but not perfect
                if (r.exist('//img[@data-veloute="slideshow-image"]/@src') ==
                        True):
                    data[str(i)]["picurl"][j - 1] = r.read(
                        '//img[@data-veloute="slideshow-image"]/@src')
                    if (r.present(
                            '//*[@data-veloute="slideshow-modal"]/div/div/div[2]/div[2]/div[2]/div[2]/div'
                    ) == True):
                        data[str(i)]["pictext"][j - 1] = r.read(
                            '//*[@data-veloute="slideshow-modal"]/div/div/div[2]/div[2]/div[2]/div[2]/div'
                        )
                    #r.download(dl_link,f'data/{i+1}/{j}.jpg')
                    print(f'Homestay {i+1} Photo {j} extracted!')

                if (r.exist('//button[@aria-label="Next"]') == False
                        or j >= 10):
                    break
                r.click('//button[@aria-label="Next"]')
        else:
            i = i - 1  #Detects Whales (Airbnb Plus spoils the format alot)
            k = k + 1  #Compensating Constant k
            print("WHALE detected, adding one more loop..")
        i = i + 1
    #r.click('/html/body/div[9]/div/div/div/section/div/div[1]/div/button')
    print('Done.')

    return data
Esempio n. 6
0
for a in range(size):

    URL = f'https://www.yelp.com/search?find_desc=Restaurants&find_loc=Singapore&l=p%3ASG-SG%3ASingapore%3A%3A{area_list[a]}'
    r.url(URL)
    time.sleep(10)

    URL_list = []
    maxpage = int(
        r.read(
            '//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[2]/span'
        ).replace("1 of ", ""))
    for j in range(0, maxpage):

        if j != 0:
            if r.present(
                    f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[2]'
            ):
                r.click(
                    f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[2]'
                )
            else:
                r.click(
                    f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[1]'
                )

        time.sleep(10)
        for i in range(1, 31):
            if r.exist(
                    f'//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/ul/li[{i}]/div/div/div/div[2]/div[1]/div/div[1]/div/div[1]/div/div/h4/span/a/@href'
            ) == False:
                break
Esempio n. 7
0
import rpa as r
import pandas as pd
import time

#Read all restaurant names in csv
yelp = pd.read_csv("yelpindex.csv")
yelp["URL"].head

r.init()

#Fill up table with URLs
for i in (0, len(yelp["URL"])):
    r.url(yelp["URL"][i])
    if r.present("//*[@id='rso']/div[1]/div/div[1]/a/@href"):
        name = r.read("//*[@id='rso']/div[1]/div/div[1]/a/@href")
    else:
        name = r.read("//*[@id='rso']/div[2]/div/div[1]/a/@href")
    print(name)
    yelp["URL"][i] = name
    time.sleep(5)

    yelp.to_csv("yelpindex_updated.csv")

    yelp2TA = pd.read_csv("yelpindex_updated.csv")
    yelp2TA.reset_index(drop=True)
    yelp2TA["URL"].head

#Get Reviews
author_loc = ""
reviews_df = pd.DataFrame()
#if y == 9: init = 33
import pickle

#Get URLs
r.init()

URL = f'https://www.tripadvisor.com.sg/Restaurants-g294265-Singapore.html'
r.url(URL)
time.sleep(10)



maxpage = int(r.read('//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/div/a[6]/@data-page-number'))
for j in range (0,maxpage):
    URL_list = []
    if j!=0: 
        if r.present(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[2]'):
            r.click(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[2]')
        else:
            r.click(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[1]')

    time.sleep(10)
    for i in range (1,100):
        if r.exist(f'(//*[@id="component_2"]/div/div[*]/span/div[1]/div[2]/div[1]/div/span/a/@href)[{i}]') == False: break
        URL_list.append("https://www.tripadvisor.com.sg" + r.read(f'(//*[@id="component_2"]/div/div[*]/span/div[1]/div[2]/div[1]/div/span/a/@href)[{i}]'))
        #print(URL_list)
        with open(f'url_list_{j}.txt', 'wb') as filehandle: pickle.dump(URL_list, filehandle)


#Load Bookmark
listnumber = 0
iteminlist = 0
Esempio n. 9
0
  def sigaaRPA(self):
    r.init()
    r.timeout(30)

    while(self.state > 0):

      if self.terminateBot:
        r.close()
        break

      elif self.state == 1:
        # use url('your_url') to go to web page, url() returns current URL
        r.url('https://sigaa.upb.edu.co/ssomanager/c/SSB')
        self.state = self.state + 1

      elif self.state == 2:
        # use type() to use the keyboard to write something
        if r.exist(X.username) & r.present(X.username):
          r.type(X.username, '000290164')
          r.type(X.password, 'Tandres1997_')
          self.state = self.state + 1
        else:
          print("Couldn\'t find Username and Password Components")
          self.state = 1

      elif self.state == 3:
        # use click() to click on an UI element or x, y location
        self.state = click(X.login, self.state)

      elif self.state == 4:
        ## hace click en Estudiantes
        self.state = click(X.estudiantes, self.state)

      elif self.state == 5:
        ## Hace click en Seguimiento a la formación
        self.state = click(X.seguimieto, self.state)

      elif self.state == 6:
        ## hace click en Calificaciones parciales
        self.state = click(X.calif, self.state)

      elif self.state == 7:
        ## Selecciona el semestre del cual quiere mirar las notas
        r.select(X.semester, self.semester)
        self.state = self.state + 1

      elif self.state == 8:
        ## se hace click en enviar
        r.click(X.enviar)
        self.state = self.state + 1

      elif self.state == 9:
        tablexpath = ''
        r.wait(2)
        numCursos = r.count('//*[@class="datadisplaytable"][2]/tbody/tr/td/a')
        for i in range(2,numCursos+2):
          tablexpath = '//*[@class="datadisplaytable"][2]/tbody/tr['+ str(i) +']/td/a'
          if r.exist(tablexpath):
            r.click(tablexpath)
            r.wait(1)
            pagetitle = r.read('//div[@id="pagetitle"]')
            if pagetitle == 'Detalle de Calificación de Componente':
              materia = r.read('//*[@class="datadisplaytable"][1]/tbody/tr[5]/td[2]')
              print(materia)
              r.snap('page', './notas/s'+self.semester+'/'+ materia +'.png')
              # r.table('//table[@class="datadisplaytable"][2]', './csv/table'+str(i-1)+'.csv')
            r.dom('history.back()')

        # use wait() to wait for a number of seconds
        # default wait() is 5 seconds
        r.wait(5)
        self.terminateBot = True
      elif self.state == 10:
        r.dom('history.back()')
Esempio n. 10
0
                                account2box = account2boxid
                                searchbutton = searchbuttonid

                        if not r.exist('//*[@id="'+account1box+'"]'):
                            i+=1
                            continue
                        
                        #enter account number
                        r.type('//*[@id="'+account1box+'"]', "[clear]")
                        r.type('//*[@id="'+account1box+'"]', acc[0])
                        r.type('//*[@id="'+account2box+'"]', "[clear]")
                        r.type('//*[@id="'+account2box+'"]', acc[1])
                        
                        #click search
                        r.click('//*[@id="'+searchbutton+'"]')
                        while r.present('//*[@class="busy-load-container"'):
                            r.wait(2)
                        r.wait(2)

                        #account exist?
                        txt = r.read('body')
                        if "No accounts were found matching your search criteria" in txt:
                            runningLog(row[0] + ': Account does not exist')
                            break
                            
                        #click last bill
                        if r.present('Latest bill'):   #Billing history has no this button, download link displayed already
                            r.click('Latest bill')
                            r.wait(1)
                            while r.present('//*[@class="busy-load-container"'):
                                r.wait(2)