Beispiel #1
0
def download_stay_picinfo(): #Downloads picture locally as jpg img, MIGHT be useful if we are doing local image classification....but I rather gcloud classify with url if needed
    print('Extracting Top 5 Stay Information..')   
    url=[]
    url=get_stay_url()
    print('Downloading Pics uploaded by host..') 
    i=0  
    k=0
    while (i<5):
        r.url(url[i+k])
        r.click('//*[@id="FMP-target"]')
        j=0
        while (1):
            j=j+1
            print(f'Downloading Homestay {i+1} Photo {j}')
            r.wait(0.4) 
            #r.snap('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img',f"data/{i+1}/{j}.jpg") #fastest but not perfect
            if (r.exist('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src') == True): 
                dl_link=r.read('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src')
                r.download(dl_link,f'data/{i+1}/{j}.jpg')
                print(f'Homestay {i+1} Photo {j} downloaded!')
            else:
                i=i-1 #Detects Whales (Airbnb Plus spoils the format alot)
                k=k+1 #Compensating Constant k
                print("WHALE detected, adding one more loop..")

            if (r.exist('/html/body/div[9]/div/div/div/div/div[3]/div/div[2]/button') == False or j >= 15): break #Max 15 photos
            r.click('/html/body/div[9]/div/div/div/div/div[3]/div/div[2]/button')
        i=i+1
    r.click('/html/body/div[9]/div/div/div/section/div/div[1]/div/button')
    print('Done.')      
Beispiel #2
0
def click(xpath, s):
    if r.exist(xpath) & r.present(xpath):
        r.click(xpath)
        return s + 1
    else:
        print('Couldn\'t find' + xpath + ' component')
        return s
Beispiel #3
0
def initialize():
    print('Initializing...')
    r.init()
    r.url(URL)
    if r.exist('//*/button[@type="submit"]') == True:
        r.click('//*/button[@type="submit"]')  # Anti RPA by AirBnB
    print('Done.')
Beispiel #4
0
def get_stay_url():
    url= [None] * 10 #catching top 10 in case of airbnb plus
    if (r.exist('//*[@id="FMP-target"]/div/div/div/div/div[1]/div/div/div/div[2]/a') == True):
        url[0]=URL+r.read('//*[@id="FMP-target"]/div/div/div/div/div[1]/div/div/div/div[2]/a/@href')
        for i in range(2,11) : url[i-1]=URL+r.read(f'//*[@id="FMP-target"]/div/div/div/div/div[{i}]/div/div/div/div[1]/a/@href')
    else:
        url[0]=URL+r.read('//div[@itemprop="itemList"]/div[2]/div/div/div/div[2]/div/div/div/div/div[1]/div/div/div/div[2]/a/@href')
        for i in range(2,11) : url[i-1]=URL+r.read(f'//div[@itemprop="itemList"]/div[2]/div/div/div/div[2]/div/div/div/div/div[{i}]/div/div/div/div[1]/a/@href')
    #print(url)
    return(url)
Beispiel #5
0
def initialize():
    print('Initializing...')
    r.init()
    r.timeout(15)  #set timeout to wait longer
    r.url(URL)
    while r.exist('//*/button[@type="submit"]') == False:
        r.url(URL)
        print("Wrong page detected, retrying..")
    r.click('//*/button[@type="submit"]')  # Anti RPA by AirBnB
    print('Done.')
Beispiel #6
0
def logout():
    r.init(visual_automation=True, chrome_browser=False)
    r.run('C:\Servyou\EPPortal_DS3.0\EPEvenue_SH.exe')

    if r.exist("a.png"):

        r.click("a.png")
        r.click("b.png")
        r.click("c.png")
    # 13s
    pass
Beispiel #7
0
def login():
    r.init(visual_automation=True, chrome_browser=False)
    r.run('E:\soft_isruning_position\WXWork\WXWork.exe')
    r.click("h.jpg")
    r.click("3.png")
    r.wait(1.5)

    is_true = r.exist("aa.png")
    print(is_true)
    if is_true:
        r.snap(
            "step-6.png",
            filename_to_save=
            r"D:\projects\S_Git_proj\spider\Other\spider_all\Rpa-Python\image")
        print("截图")
        print("进行图片传输")
        r.wait(1.5)

        is_true = r.exist('step-1.png')
        if is_true:
            print("login success")
        else:
            print("login fail")
Beispiel #8
0
def login_to_admin(admin_url, username, password):
    r.url(admin_url)

    element = '//input[@id="id_username"]'
    if r.exist(element):
        print('Login')
        r.click(element)
        r.type(element, username)

        element = '//input[@id="id_password"]'
        r.click(element)
        r.type(element, password)
        #print(element, password)

        element = '//input[@type="submit"]'
        r.click(element)
    else:
        print('Already logged in')
Beispiel #9
0
def enter_personnel(adult, child, infant):
    r.click('//*[@role="search"]/div/div/div[5]/div/button')
    print('Entering Personnel Information..')
    #r.click('//*[@id="filter-menu-chip-group"]/div[3]/*')
    if r.exist('(//*[@aria-label="increase value"])[1]') == True:
        for _i in range(adult):
            r.click('(//*[@aria-label="increase value"])[1]')
        for _i in range(child):
            r.click('(//*[@aria-label="increase value"])[2]')
        for _i in range(infant):
            r.click('(//*[@aria-label="increase value"])[3]')
    else:
        for _i in range(adult):
            r.click(
                '//*[@aria-describedby="subtitle-label-stepper-adults"][2]')
        for _i in range(child):
            r.click(
                '//*[@aria-describedby="subtitle-label-stepper-children"][2]')
        for _i in range(infant):
            r.click(
                '//*[@aria-describedby="subtitle-label-stepper-infants"][2]')
    #r.click('//*[@id="filter-panel-save-button"]')
    print('Done.')
# r.url('https://ec.nintendo.com/my/#/transactions/1')
r.url('https://ec.nintendo.com/my/#/')
t.sleep(7)

# click on purchase history if logged in
r.click(
    '/html/body/div[1]/div[2]/section/div[1]/section[2]/section/ul/li[3]/a/div[1]'
)
t.sleep(5)
r.timeout(300)
# set a maximum 5-minute timeout for user to login
# t.sleep(300)

# use exist() function with XPath to check if logged in
if not r.exist(
        '/html/body/div[1]/div[2]/section/div[1]/section[2]/div/div/section[1]/div[2]/div/div[1]/div[2]'
):
    r.dom('alert("Purchase History Page not detected after 5 minutes. Bye!")')

# then click on last item arrow
r.click(
    '/html/body/div[1]/div[2]/section/div[1]/section[2]/div/div/ni-pager/section/div/button[9]'
)

# field using css selector
# key_name1 = 'section.o_c-card-history:nth-child('
# key_name2 = ') > div:nth-child(2) > div:nth-child(1) > div:nth-child(1) > div:nth-child(2) > span:nth-child(1)'
# field using xpath
key_name1 = '/html/body/div[1]/div[2]/section/div[1]/section[2]/div/div/section['
key_name2 = ']/div[2]/div/div[1]/div[2]'
Beispiel #11
0
def extract_stay_info_as_data(
):  #Generates URL/text in dict instead, shorten time for upload/download, more unified

    data = {
        "0": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": "",
            "coordinates": ""
        },
        "1": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": "",
            "coordinates": ""
        },
        "2": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": "",
            "coordinates": ""
        },
        "3": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": "",
            "coordinates": ""
        },
        "4": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": "",
            "coordinates": ""
        }
    }

    print('Extracting Top 5 Stay Picture Information (10 Image Max)..')
    url = []
    url = get_stay_url()
    i = 0
    k = 0
    while (i < 5):
        data[str(i)]["url"] = url[i + k]
        r.url(url[i + k])
        print(f'Extracting Text Data - Homestay {i+1}')
        if (r.exist('//*[@itemprop="name"]/span/h1/span') == True):
            data[str(i)]["coordinates"] = r.read(
                '//*[@data-veloute="map/GoogleMap"]/div/div/div/div[2]/a/@href'
            ).split("=", 1)[1].split("&", 1)[0]

            data[str(i)]["name"] = r.read('//*[@itemprop="name"]/span/h1/span')

            data[str(i)]["description"] = r.read(
                '//*[@href="#neighborhood"]/div')
            #data[str(i)]["description"]=data[str(i)]["description"].replace("\xa0"," ")

            data[str(i)]["inventory"] = r.read(
                '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[1]/div'
            ) + " " + r.read(
                '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[2]/div'
            ) + " " + r.read(
                '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[3]/div'
            ) + " " + r.read(
                '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[4]/div'
            )

            if (r.present('//*[@id="book_it_form"]/div[4]/div[2]') == True):
                data[str(i)]["price"] = r.read(
                    '//*[@id="book_it_form"]/div[4]/div[2]').split("Total",
                                                                   1)[1]
            else:
                data[str(i)]["price"] = r.read(
                    '//*[@id="book_it_form"]/div[2]').split("Total",
                                                            1)[1]  #Total Price

            if r.present('//*[@data-heading-focus="review header"]/div'):
                data[str(i)]["rating"] = r.read(
                    '//*[@data-heading-focus="review header"]/div/div/@aria-label'
                ) + " (" + r.read(
                    '//*[@data-heading-focus="review header"]/div/span') + ")"
            else:
                data[str(i)]["rating"] = "No Reviews Yet"

            r.click('//*[@data-veloute="hero-view-photos-button"]')
            j = 0
            while (1):
                j = j + 1
                print(f'Extracting Picture Data - Homestay {i+1} Photo {j}')
                r.wait(0.4)
                #r.snap('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img',f"data/{i+1}/{j}.jpg") #fastest but not perfect
                if (r.exist('//img[@data-veloute="slideshow-image"]/@src') ==
                        True):
                    data[str(i)]["picurl"][j - 1] = r.read(
                        '//img[@data-veloute="slideshow-image"]/@src')
                    if (r.present(
                            '//*[@data-veloute="slideshow-modal"]/div/div/div[2]/div[2]/div[2]/div[2]/div'
                    ) == True):
                        data[str(i)]["pictext"][j - 1] = r.read(
                            '//*[@data-veloute="slideshow-modal"]/div/div/div[2]/div[2]/div[2]/div[2]/div'
                        )
                    #r.download(dl_link,f'data/{i+1}/{j}.jpg')
                    print(f'Homestay {i+1} Photo {j} extracted!')

                if (r.exist('//button[@aria-label="Next"]') == False
                        or j >= 10):
                    break
                r.click('//button[@aria-label="Next"]')
        else:
            i = i - 1  #Detects Whales (Airbnb Plus spoils the format alot)
            k = k + 1  #Compensating Constant k
            print("WHALE detected, adding one more loop..")
        i = i + 1
    #r.click('/html/body/div[9]/div/div/div/section/div/div[1]/div/button')
    print('Done.')

    return data
Beispiel #12
0
        if j != 0:
            if r.present(
                    f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[2]'
            ):
                r.click(
                    f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[2]'
                )
            else:
                r.click(
                    f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[1]'
                )

        time.sleep(10)
        for i in range(1, 31):
            if r.exist(
                    f'//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/ul/li[{i}]/div/div/div/div[2]/div[1]/div/div[1]/div/div[1]/div/div/h4/span/a/@href'
            ) == False:
                break
            URL_list.append("https://www.yelp.com" + r.read(
                f'//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/ul/li[{i}]/div/div/div/div[2]/div[1]/div/div[1]/div/div[1]/div/div/h4/span/a/@href'
            ))
            print(URL_list)

    reviews_df = pd.DataFrame()

    for x in range(0, 30):
        if len(URL_list) <= x: break
        r.url(URL_list[x])
        time.sleep(10)
        if r.present(
                '//*[@id="wrap"]/div[3]/div/div/div/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div/div[2]/div[2]/p'
Beispiel #13
0
                rating = int(
                    r.read(
                        f'(//*[contains(@id,"review")]/div/div/span[1]/@class)[{i+i0}]'
                    ).replace("ui_bubble_rating bubble_", "")) / 10
            else:
                i0 = 1
                rating = int(
                    r.read(
                        f'(//*[contains(@id,"review")]/div/div/span[1]/@class)[{i+i0}]'
                    ).replace("ui_bubble_rating bubble_", "")) / 10

            author = r.read(
                f'(//*[@class="memberOverlayLink clickable"]/div[2])[{i}]/div[1]'
            )
            if r.exist(
                    f'(//*[@class="memberOverlayLink clickable"]/div[2])[{i}]/div[2]'
            ):
                author_loc = r.read(
                    f'(//*[@class="memberOverlayLink clickable"]/div[2])[{i}]/div[2]'
                )
            date = r.read(f'(//*[@class="ratingDate"]/@title)[{i}]')
            review = r.read(
                f'(//*[contains(@id,"review")]/div/div/div[2]/div/p)[{i}]')
            origin = "TripAdvisor"
            #print(name)
            #print(location)
            #print(address)
            #print(type)
            #print(rating)
            #print(author)
            #print(author_loc)
Beispiel #14
0
def initialize():
    r.init()
    r.url(URL)
    if r.exist('//*/button[@type="submit"]') == True:
        r.click('//*/button[@type="submit"]')  #Anti RPA by AirBnB
time.sleep(10)



maxpage = int(r.read('//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/div/a[6]/@data-page-number'))
for j in range (0,maxpage):
    URL_list = []
    if j!=0: 
        if r.present(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[2]'):
            r.click(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[2]')
        else:
            r.click(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[1]')

    time.sleep(10)
    for i in range (1,100):
        if r.exist(f'(//*[@id="component_2"]/div/div[*]/span/div[1]/div[2]/div[1]/div/span/a/@href)[{i}]') == False: break
        URL_list.append("https://www.tripadvisor.com.sg" + r.read(f'(//*[@id="component_2"]/div/div[*]/span/div[1]/div[2]/div[1]/div/span/a/@href)[{i}]'))
        #print(URL_list)
        with open(f'url_list_{j}.txt', 'wb') as filehandle: pickle.dump(URL_list, filehandle)


#Load Bookmark
listnumber = 0
iteminlist = 0
with open(f'url_list_{listnumber}.txt', 'rb') as filehandle: URL_list = pickle.load(filehandle)
print(URL_list)



#Get Reviews from URLs
author_loc=""
Beispiel #16
0
  def sigaaRPA(self):
    r.init()
    r.timeout(30)

    while(self.state > 0):

      if self.terminateBot:
        r.close()
        break

      elif self.state == 1:
        # use url('your_url') to go to web page, url() returns current URL
        r.url('https://sigaa.upb.edu.co/ssomanager/c/SSB')
        self.state = self.state + 1

      elif self.state == 2:
        # use type() to use the keyboard to write something
        if r.exist(X.username) & r.present(X.username):
          r.type(X.username, '000290164')
          r.type(X.password, 'Tandres1997_')
          self.state = self.state + 1
        else:
          print("Couldn\'t find Username and Password Components")
          self.state = 1

      elif self.state == 3:
        # use click() to click on an UI element or x, y location
        self.state = click(X.login, self.state)

      elif self.state == 4:
        ## hace click en Estudiantes
        self.state = click(X.estudiantes, self.state)

      elif self.state == 5:
        ## Hace click en Seguimiento a la formación
        self.state = click(X.seguimieto, self.state)

      elif self.state == 6:
        ## hace click en Calificaciones parciales
        self.state = click(X.calif, self.state)

      elif self.state == 7:
        ## Selecciona el semestre del cual quiere mirar las notas
        r.select(X.semester, self.semester)
        self.state = self.state + 1

      elif self.state == 8:
        ## se hace click en enviar
        r.click(X.enviar)
        self.state = self.state + 1

      elif self.state == 9:
        tablexpath = ''
        r.wait(2)
        numCursos = r.count('//*[@class="datadisplaytable"][2]/tbody/tr/td/a')
        for i in range(2,numCursos+2):
          tablexpath = '//*[@class="datadisplaytable"][2]/tbody/tr['+ str(i) +']/td/a'
          if r.exist(tablexpath):
            r.click(tablexpath)
            r.wait(1)
            pagetitle = r.read('//div[@id="pagetitle"]')
            if pagetitle == 'Detalle de Calificación de Componente':
              materia = r.read('//*[@class="datadisplaytable"][1]/tbody/tr[5]/td[2]')
              print(materia)
              r.snap('page', './notas/s'+self.semester+'/'+ materia +'.png')
              # r.table('//table[@class="datadisplaytable"][2]', './csv/table'+str(i-1)+'.csv')
            r.dom('history.back()')

        # use wait() to wait for a number of seconds
        # default wait() is 5 seconds
        r.wait(5)
        self.terminateBot = True
      elif self.state == 10:
        r.dom('history.back()')
Beispiel #17
0
def extract_stay_info_as_data(
):  #Generates URL/text in dict instead, shorten time for upload/download, more unified

    data = {
        "0": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": ""
        },
        "1": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": ""
        },
        "2": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": ""
        },
        "3": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": ""
        },
        "4": {
            "name": "",
            "description": "",
            "inventory": "",
            "price": "",
            "rating": "",
            "picurl": [None] * 10,
            "pictext": [None] * 10,
            "url": ""
        }
    }

    print('Extracting Top 5 Stay Picture Information (10 Image Max)..')
    url = []
    url = get_stay_url()
    i = 0
    k = 0
    while (i < 5):
        data[str(i)]["url"] = url[i + k]
        r.url(url[i + k])
        print(f'Extracting Text Data - Homestay {i+1}')
        if (r.exist(
                '//*[@data-plugin-in-point-id="TITLE_DEFAULT"]/div/div/section/div/div/h1'
        ) == True):
            data[str(i)]["name"] = r.read(
                '//*[@data-plugin-in-point-id="TITLE_DEFAULT"]/div/div/section/div/div/h1'
            )
            data[str(i)]["description"] = r.read(
                '//*[@data-plugin-in-point-id="OVERVIEW_DEFAULT"]/div/div/div/section/div/div/div/div/div'
            )
            data[str(i)]["description"] = data[str(i)]["description"].replace(
                "\xa0", " ")
            data[str(i)]["inventory"] = r.read(
                '//*[@data-plugin-in-point-id="OVERVIEW_DEFAULT"]/div/div/div/section/div/div/div/div/div[2]'
            )
            data[str(i)]["price"] = r.read(
                '//*[@data-plugin-in-point-id="BOOK_IT_SIDEBAR"]/div/div[2]/div/ul[2]/li/span[2]'
            )  #Total Price
            if r.present(
                    '//*[@data-plugin-in-point-id="REVIEWS_DEFAULT"]/div/div/section/div/div/div/h2/span[2]/span'
            ):
                data[str(i)]["rating"] = r.read(
                    '//*[@data-plugin-in-point-id="REVIEWS_DEFAULT"]/div/div/section/div/div/div/h2/span[2]/span'
                )
            else:
                data[str(i)]["rating"] = "No Reviews Yet"
            r.click('//*[@id="FMP-target"]')
            j = 0
            while (1):
                j = j + 1
                print(f'Extracting Picture Data - Homestay {i+1} Photo {j}')
                r.wait(0.4)
                #r.snap('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img',f"data/{i+1}/{j}.jpg") #fastest but not perfect
                if (r.exist(
                        '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src'
                ) == True):
                    data[str(i)]["picurl"][j - 1] = r.read(
                        '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src'
                    )
                    if (r.present(
                            '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div[2]/div/span/div/span'
                    ) == True):
                        data[str(i)]["pictext"][j - 1] = r.read(
                            '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div[2]/div/span/div/span'
                        )
                    #r.download(dl_link,f'data/{i+1}/{j}.jpg')
                    print(f'Homestay {i+1} Photo {j} extracted!')

                if (r.exist('//*[@aria-label="Next"]') == False or j >= 10):
                    break
                r.click('//*[@aria-label="Next"]')
        else:
            i = i - 1  #Detects Whales (Airbnb Plus spoils the format alot)
            k = k + 1  #Compensating Constant k
            print("WHALE detected, adding one more loop..")
        i = i + 1
    #r.click('/html/body/div[9]/div/div/div/section/div/div[1]/div/button')
    print('Done.')

    return data
Beispiel #18
0
                    try:
                        if os.path.exists(pdfFile):
                            os.remove(pdfFile)
                        i+=1
                        if i == 1:
                            account1box = account1boxid
                            account2box = account2boxid
                            searchbutton = searchbuttonid
                        else:
                            account1box = account1boxid_hist
                            account2box = account2boxid_hist
                            searchbutton = searchbuttonid_hist
                        

                        #try 2 types of box id
                        if not r.exist('//*[@id="'+account1box+'"]'):
                            if account1box == account1boxid:
                                account1box = account1boxid_hist
                                account2box = account2boxid_hist
                                searchbutton = searchbuttonid_hist
                            elif account1box == account1boxid_hist:
                                account1box = account1boxid
                                account2box = account2boxid
                                searchbutton = searchbuttonid

                        if not r.exist('//*[@id="'+account1box+'"]'):
                            i+=1
                            continue
                        
                        #enter account number
                        r.type('//*[@id="'+account1box+'"]', "[clear]")