def download_stay_picinfo(): #Downloads picture locally as jpg img, MIGHT be useful if we are doing local image classification....but I rather gcloud classify with url if needed print('Extracting Top 5 Stay Information..') url=[] url=get_stay_url() print('Downloading Pics uploaded by host..') i=0 k=0 while (i<5): r.url(url[i+k]) r.click('//*[@id="FMP-target"]') j=0 while (1): j=j+1 print(f'Downloading Homestay {i+1} Photo {j}') r.wait(0.4) #r.snap('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img',f"data/{i+1}/{j}.jpg") #fastest but not perfect if (r.exist('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src') == True): dl_link=r.read('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src') r.download(dl_link,f'data/{i+1}/{j}.jpg') print(f'Homestay {i+1} Photo {j} downloaded!') else: i=i-1 #Detects Whales (Airbnb Plus spoils the format alot) k=k+1 #Compensating Constant k print("WHALE detected, adding one more loop..") if (r.exist('/html/body/div[9]/div/div/div/div/div[3]/div/div[2]/button') == False or j >= 15): break #Max 15 photos r.click('/html/body/div[9]/div/div/div/div/div[3]/div/div[2]/button') i=i+1 r.click('/html/body/div[9]/div/div/div/section/div/div[1]/div/button') print('Done.')
def click(xpath, s): if r.exist(xpath) & r.present(xpath): r.click(xpath) return s + 1 else: print('Couldn\'t find' + xpath + ' component') return s
def initialize(): print('Initializing...') r.init() r.url(URL) if r.exist('//*/button[@type="submit"]') == True: r.click('//*/button[@type="submit"]') # Anti RPA by AirBnB print('Done.')
def get_stay_url(): url= [None] * 10 #catching top 10 in case of airbnb plus if (r.exist('//*[@id="FMP-target"]/div/div/div/div/div[1]/div/div/div/div[2]/a') == True): url[0]=URL+r.read('//*[@id="FMP-target"]/div/div/div/div/div[1]/div/div/div/div[2]/a/@href') for i in range(2,11) : url[i-1]=URL+r.read(f'//*[@id="FMP-target"]/div/div/div/div/div[{i}]/div/div/div/div[1]/a/@href') else: url[0]=URL+r.read('//div[@itemprop="itemList"]/div[2]/div/div/div/div[2]/div/div/div/div/div[1]/div/div/div/div[2]/a/@href') for i in range(2,11) : url[i-1]=URL+r.read(f'//div[@itemprop="itemList"]/div[2]/div/div/div/div[2]/div/div/div/div/div[{i}]/div/div/div/div[1]/a/@href') #print(url) return(url)
def initialize(): print('Initializing...') r.init() r.timeout(15) #set timeout to wait longer r.url(URL) while r.exist('//*/button[@type="submit"]') == False: r.url(URL) print("Wrong page detected, retrying..") r.click('//*/button[@type="submit"]') # Anti RPA by AirBnB print('Done.')
def logout(): r.init(visual_automation=True, chrome_browser=False) r.run('C:\Servyou\EPPortal_DS3.0\EPEvenue_SH.exe') if r.exist("a.png"): r.click("a.png") r.click("b.png") r.click("c.png") # 13s pass
def login(): r.init(visual_automation=True, chrome_browser=False) r.run('E:\soft_isruning_position\WXWork\WXWork.exe') r.click("h.jpg") r.click("3.png") r.wait(1.5) is_true = r.exist("aa.png") print(is_true) if is_true: r.snap( "step-6.png", filename_to_save= r"D:\projects\S_Git_proj\spider\Other\spider_all\Rpa-Python\image") print("截图") print("进行图片传输") r.wait(1.5) is_true = r.exist('step-1.png') if is_true: print("login success") else: print("login fail")
def login_to_admin(admin_url, username, password): r.url(admin_url) element = '//input[@id="id_username"]' if r.exist(element): print('Login') r.click(element) r.type(element, username) element = '//input[@id="id_password"]' r.click(element) r.type(element, password) #print(element, password) element = '//input[@type="submit"]' r.click(element) else: print('Already logged in')
def enter_personnel(adult, child, infant): r.click('//*[@role="search"]/div/div/div[5]/div/button') print('Entering Personnel Information..') #r.click('//*[@id="filter-menu-chip-group"]/div[3]/*') if r.exist('(//*[@aria-label="increase value"])[1]') == True: for _i in range(adult): r.click('(//*[@aria-label="increase value"])[1]') for _i in range(child): r.click('(//*[@aria-label="increase value"])[2]') for _i in range(infant): r.click('(//*[@aria-label="increase value"])[3]') else: for _i in range(adult): r.click( '//*[@aria-describedby="subtitle-label-stepper-adults"][2]') for _i in range(child): r.click( '//*[@aria-describedby="subtitle-label-stepper-children"][2]') for _i in range(infant): r.click( '//*[@aria-describedby="subtitle-label-stepper-infants"][2]') #r.click('//*[@id="filter-panel-save-button"]') print('Done.')
# r.url('https://ec.nintendo.com/my/#/transactions/1') r.url('https://ec.nintendo.com/my/#/') t.sleep(7) # click on purchase history if logged in r.click( '/html/body/div[1]/div[2]/section/div[1]/section[2]/section/ul/li[3]/a/div[1]' ) t.sleep(5) r.timeout(300) # set a maximum 5-minute timeout for user to login # t.sleep(300) # use exist() function with XPath to check if logged in if not r.exist( '/html/body/div[1]/div[2]/section/div[1]/section[2]/div/div/section[1]/div[2]/div/div[1]/div[2]' ): r.dom('alert("Purchase History Page not detected after 5 minutes. Bye!")') # then click on last item arrow r.click( '/html/body/div[1]/div[2]/section/div[1]/section[2]/div/div/ni-pager/section/div/button[9]' ) # field using css selector # key_name1 = 'section.o_c-card-history:nth-child(' # key_name2 = ') > div:nth-child(2) > div:nth-child(1) > div:nth-child(1) > div:nth-child(2) > span:nth-child(1)' # field using xpath key_name1 = '/html/body/div[1]/div[2]/section/div[1]/section[2]/div/div/section[' key_name2 = ']/div[2]/div/div[1]/div[2]'
def extract_stay_info_as_data( ): #Generates URL/text in dict instead, shorten time for upload/download, more unified data = { "0": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "", "coordinates": "" }, "1": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "", "coordinates": "" }, "2": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "", "coordinates": "" }, "3": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "", "coordinates": "" }, "4": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "", "coordinates": "" } } print('Extracting Top 5 Stay Picture Information (10 Image Max)..') url = [] url = get_stay_url() i = 0 k = 0 while (i < 5): data[str(i)]["url"] = url[i + k] r.url(url[i + k]) print(f'Extracting Text Data - Homestay {i+1}') if (r.exist('//*[@itemprop="name"]/span/h1/span') == True): data[str(i)]["coordinates"] = r.read( '//*[@data-veloute="map/GoogleMap"]/div/div/div/div[2]/a/@href' ).split("=", 1)[1].split("&", 1)[0] data[str(i)]["name"] = r.read('//*[@itemprop="name"]/span/h1/span') data[str(i)]["description"] = r.read( '//*[@href="#neighborhood"]/div') #data[str(i)]["description"]=data[str(i)]["description"].replace("\xa0"," ") data[str(i)]["inventory"] = r.read( '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[1]/div' ) + " " + r.read( '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[2]/div' ) + " " + r.read( '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[3]/div' ) + " " + r.read( '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[4]/div' ) if (r.present('//*[@id="book_it_form"]/div[4]/div[2]') == True): data[str(i)]["price"] = r.read( '//*[@id="book_it_form"]/div[4]/div[2]').split("Total", 1)[1] else: data[str(i)]["price"] = r.read( '//*[@id="book_it_form"]/div[2]').split("Total", 1)[1] #Total Price if r.present('//*[@data-heading-focus="review header"]/div'): data[str(i)]["rating"] = r.read( '//*[@data-heading-focus="review header"]/div/div/@aria-label' ) + " (" + r.read( '//*[@data-heading-focus="review header"]/div/span') + ")" else: data[str(i)]["rating"] = "No Reviews Yet" r.click('//*[@data-veloute="hero-view-photos-button"]') j = 0 while (1): j = j + 1 print(f'Extracting Picture Data - Homestay {i+1} Photo {j}') r.wait(0.4) #r.snap('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img',f"data/{i+1}/{j}.jpg") #fastest but not perfect if (r.exist('//img[@data-veloute="slideshow-image"]/@src') == True): data[str(i)]["picurl"][j - 1] = r.read( '//img[@data-veloute="slideshow-image"]/@src') if (r.present( '//*[@data-veloute="slideshow-modal"]/div/div/div[2]/div[2]/div[2]/div[2]/div' ) == True): data[str(i)]["pictext"][j - 1] = r.read( '//*[@data-veloute="slideshow-modal"]/div/div/div[2]/div[2]/div[2]/div[2]/div' ) #r.download(dl_link,f'data/{i+1}/{j}.jpg') print(f'Homestay {i+1} Photo {j} extracted!') if (r.exist('//button[@aria-label="Next"]') == False or j >= 10): break r.click('//button[@aria-label="Next"]') else: i = i - 1 #Detects Whales (Airbnb Plus spoils the format alot) k = k + 1 #Compensating Constant k print("WHALE detected, adding one more loop..") i = i + 1 #r.click('/html/body/div[9]/div/div/div/section/div/div[1]/div/button') print('Done.') return data
if j != 0: if r.present( f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[2]' ): r.click( f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[2]' ) else: r.click( f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[1]' ) time.sleep(10) for i in range(1, 31): if r.exist( f'//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/ul/li[{i}]/div/div/div/div[2]/div[1]/div/div[1]/div/div[1]/div/div/h4/span/a/@href' ) == False: break URL_list.append("https://www.yelp.com" + r.read( f'//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/ul/li[{i}]/div/div/div/div[2]/div[1]/div/div[1]/div/div[1]/div/div/h4/span/a/@href' )) print(URL_list) reviews_df = pd.DataFrame() for x in range(0, 30): if len(URL_list) <= x: break r.url(URL_list[x]) time.sleep(10) if r.present( '//*[@id="wrap"]/div[3]/div/div/div/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div/div[2]/div[2]/p'
rating = int( r.read( f'(//*[contains(@id,"review")]/div/div/span[1]/@class)[{i+i0}]' ).replace("ui_bubble_rating bubble_", "")) / 10 else: i0 = 1 rating = int( r.read( f'(//*[contains(@id,"review")]/div/div/span[1]/@class)[{i+i0}]' ).replace("ui_bubble_rating bubble_", "")) / 10 author = r.read( f'(//*[@class="memberOverlayLink clickable"]/div[2])[{i}]/div[1]' ) if r.exist( f'(//*[@class="memberOverlayLink clickable"]/div[2])[{i}]/div[2]' ): author_loc = r.read( f'(//*[@class="memberOverlayLink clickable"]/div[2])[{i}]/div[2]' ) date = r.read(f'(//*[@class="ratingDate"]/@title)[{i}]') review = r.read( f'(//*[contains(@id,"review")]/div/div/div[2]/div/p)[{i}]') origin = "TripAdvisor" #print(name) #print(location) #print(address) #print(type) #print(rating) #print(author) #print(author_loc)
def initialize(): r.init() r.url(URL) if r.exist('//*/button[@type="submit"]') == True: r.click('//*/button[@type="submit"]') #Anti RPA by AirBnB
time.sleep(10) maxpage = int(r.read('//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/div/a[6]/@data-page-number')) for j in range (0,maxpage): URL_list = [] if j!=0: if r.present(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[2]'): r.click(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[2]') else: r.click(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[1]') time.sleep(10) for i in range (1,100): if r.exist(f'(//*[@id="component_2"]/div/div[*]/span/div[1]/div[2]/div[1]/div/span/a/@href)[{i}]') == False: break URL_list.append("https://www.tripadvisor.com.sg" + r.read(f'(//*[@id="component_2"]/div/div[*]/span/div[1]/div[2]/div[1]/div/span/a/@href)[{i}]')) #print(URL_list) with open(f'url_list_{j}.txt', 'wb') as filehandle: pickle.dump(URL_list, filehandle) #Load Bookmark listnumber = 0 iteminlist = 0 with open(f'url_list_{listnumber}.txt', 'rb') as filehandle: URL_list = pickle.load(filehandle) print(URL_list) #Get Reviews from URLs author_loc=""
def sigaaRPA(self): r.init() r.timeout(30) while(self.state > 0): if self.terminateBot: r.close() break elif self.state == 1: # use url('your_url') to go to web page, url() returns current URL r.url('https://sigaa.upb.edu.co/ssomanager/c/SSB') self.state = self.state + 1 elif self.state == 2: # use type() to use the keyboard to write something if r.exist(X.username) & r.present(X.username): r.type(X.username, '000290164') r.type(X.password, 'Tandres1997_') self.state = self.state + 1 else: print("Couldn\'t find Username and Password Components") self.state = 1 elif self.state == 3: # use click() to click on an UI element or x, y location self.state = click(X.login, self.state) elif self.state == 4: ## hace click en Estudiantes self.state = click(X.estudiantes, self.state) elif self.state == 5: ## Hace click en Seguimiento a la formación self.state = click(X.seguimieto, self.state) elif self.state == 6: ## hace click en Calificaciones parciales self.state = click(X.calif, self.state) elif self.state == 7: ## Selecciona el semestre del cual quiere mirar las notas r.select(X.semester, self.semester) self.state = self.state + 1 elif self.state == 8: ## se hace click en enviar r.click(X.enviar) self.state = self.state + 1 elif self.state == 9: tablexpath = '' r.wait(2) numCursos = r.count('//*[@class="datadisplaytable"][2]/tbody/tr/td/a') for i in range(2,numCursos+2): tablexpath = '//*[@class="datadisplaytable"][2]/tbody/tr['+ str(i) +']/td/a' if r.exist(tablexpath): r.click(tablexpath) r.wait(1) pagetitle = r.read('//div[@id="pagetitle"]') if pagetitle == 'Detalle de Calificación de Componente': materia = r.read('//*[@class="datadisplaytable"][1]/tbody/tr[5]/td[2]') print(materia) r.snap('page', './notas/s'+self.semester+'/'+ materia +'.png') # r.table('//table[@class="datadisplaytable"][2]', './csv/table'+str(i-1)+'.csv') r.dom('history.back()') # use wait() to wait for a number of seconds # default wait() is 5 seconds r.wait(5) self.terminateBot = True elif self.state == 10: r.dom('history.back()')
def extract_stay_info_as_data( ): #Generates URL/text in dict instead, shorten time for upload/download, more unified data = { "0": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "" }, "1": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "" }, "2": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "" }, "3": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "" }, "4": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "" } } print('Extracting Top 5 Stay Picture Information (10 Image Max)..') url = [] url = get_stay_url() i = 0 k = 0 while (i < 5): data[str(i)]["url"] = url[i + k] r.url(url[i + k]) print(f'Extracting Text Data - Homestay {i+1}') if (r.exist( '//*[@data-plugin-in-point-id="TITLE_DEFAULT"]/div/div/section/div/div/h1' ) == True): data[str(i)]["name"] = r.read( '//*[@data-plugin-in-point-id="TITLE_DEFAULT"]/div/div/section/div/div/h1' ) data[str(i)]["description"] = r.read( '//*[@data-plugin-in-point-id="OVERVIEW_DEFAULT"]/div/div/div/section/div/div/div/div/div' ) data[str(i)]["description"] = data[str(i)]["description"].replace( "\xa0", " ") data[str(i)]["inventory"] = r.read( '//*[@data-plugin-in-point-id="OVERVIEW_DEFAULT"]/div/div/div/section/div/div/div/div/div[2]' ) data[str(i)]["price"] = r.read( '//*[@data-plugin-in-point-id="BOOK_IT_SIDEBAR"]/div/div[2]/div/ul[2]/li/span[2]' ) #Total Price if r.present( '//*[@data-plugin-in-point-id="REVIEWS_DEFAULT"]/div/div/section/div/div/div/h2/span[2]/span' ): data[str(i)]["rating"] = r.read( '//*[@data-plugin-in-point-id="REVIEWS_DEFAULT"]/div/div/section/div/div/div/h2/span[2]/span' ) else: data[str(i)]["rating"] = "No Reviews Yet" r.click('//*[@id="FMP-target"]') j = 0 while (1): j = j + 1 print(f'Extracting Picture Data - Homestay {i+1} Photo {j}') r.wait(0.4) #r.snap('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img',f"data/{i+1}/{j}.jpg") #fastest but not perfect if (r.exist( '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src' ) == True): data[str(i)]["picurl"][j - 1] = r.read( '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src' ) if (r.present( '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div[2]/div/span/div/span' ) == True): data[str(i)]["pictext"][j - 1] = r.read( '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div[2]/div/span/div/span' ) #r.download(dl_link,f'data/{i+1}/{j}.jpg') print(f'Homestay {i+1} Photo {j} extracted!') if (r.exist('//*[@aria-label="Next"]') == False or j >= 10): break r.click('//*[@aria-label="Next"]') else: i = i - 1 #Detects Whales (Airbnb Plus spoils the format alot) k = k + 1 #Compensating Constant k print("WHALE detected, adding one more loop..") i = i + 1 #r.click('/html/body/div[9]/div/div/div/section/div/div[1]/div/button') print('Done.') return data
try: if os.path.exists(pdfFile): os.remove(pdfFile) i+=1 if i == 1: account1box = account1boxid account2box = account2boxid searchbutton = searchbuttonid else: account1box = account1boxid_hist account2box = account2boxid_hist searchbutton = searchbuttonid_hist #try 2 types of box id if not r.exist('//*[@id="'+account1box+'"]'): if account1box == account1boxid: account1box = account1boxid_hist account2box = account2boxid_hist searchbutton = searchbuttonid_hist elif account1box == account1boxid_hist: account1box = account1boxid account2box = account2boxid searchbutton = searchbuttonid if not r.exist('//*[@id="'+account1box+'"]'): i+=1 continue #enter account number r.type('//*[@id="'+account1box+'"]', "[clear]")