def monthyearnavigate(monthyear): compare = r.read( '//*[@aria-roledescription="datepicker"]/div/div[1]/div[2]/div/div/div') while(monthyear != compare): r.click('//*[contains(@aria-label,"Next")]') compare = r.read( '//*[@aria-roledescription="datepicker"]/div/div[1]/div[2]/div/div/div')
def get_stay_url(): url= [None] * 10 #catching top 10 in case of airbnb plus if (r.exist('//*[@id="FMP-target"]/div/div/div/div/div[1]/div/div/div/div[2]/a') == True): url[0]=URL+r.read('//*[@id="FMP-target"]/div/div/div/div/div[1]/div/div/div/div[2]/a/@href') for i in range(2,11) : url[i-1]=URL+r.read(f'//*[@id="FMP-target"]/div/div/div/div/div[{i}]/div/div/div/div[1]/a/@href') else: url[0]=URL+r.read('//div[@itemprop="itemList"]/div[2]/div/div/div/div[2]/div/div/div/div/div[1]/div/div/div/div[2]/a/@href') for i in range(2,11) : url[i-1]=URL+r.read(f'//div[@itemprop="itemList"]/div[2]/div/div/div/div[2]/div/div/div/div/div[{i}]/div/div/div/div[1]/a/@href') #print(url) return(url)
def get_rpa(timeout=10): r.init() r.url('https://www.google.com') r.type('//*[@name="q"]', 'decentralization[enter]') print(r.read('result-stats')) r.snap('page', 'results.png') r.close()
def change_password(username, new_password): #//*[@id="content-main"]/div[9]/table/tbody/tr/th/a element = '//tr[@class="model-user"]/th/a' r.click(element) #//Element[@attribute1="abc" and @attribute2="xyz" and text()="Data"] #user_elem = f'//td[@class="field-name" and text()="{username}"]' #/preceding-sibling::th[@class="field-id"]/a' user_elem = f'//td[text()="{username}"]/preceding-sibling::th[@class="field-id"]/a' r.click(user_elem) pwd_form_elem = '//a[@href="../password/"]' r.click(pwd_form_elem) pwd1_elem = '//input[@id="id_password1"]' r.click(pwd1_elem) r.type(pwd1_elem, new_password) pwd2_elem = '//input[@id="id_password2"]' r.click(pwd2_elem) r.type(pwd2_elem, new_password) submit_elem = '//input[@type="submit"]' r.click(submit_elem) success_elem = '//ul[@class="messagelist"]/li[@class="success"]' txt = r.read(success_elem) print('CLICK') return txt
def download_stay_picinfo(): #Downloads picture locally as jpg img, MIGHT be useful if we are doing local image classification....but I rather gcloud classify with url if needed print('Extracting Top 5 Stay Information..') url=[] url=get_stay_url() print('Downloading Pics uploaded by host..') i=0 k=0 while (i<5): r.url(url[i+k]) r.click('//*[@id="FMP-target"]') j=0 while (1): j=j+1 print(f'Downloading Homestay {i+1} Photo {j}') r.wait(0.4) #r.snap('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img',f"data/{i+1}/{j}.jpg") #fastest but not perfect if (r.exist('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src') == True): dl_link=r.read('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src') r.download(dl_link,f'data/{i+1}/{j}.jpg') print(f'Homestay {i+1} Photo {j} downloaded!') else: i=i-1 #Detects Whales (Airbnb Plus spoils the format alot) k=k+1 #Compensating Constant k print("WHALE detected, adding one more loop..") if (r.exist('/html/body/div[9]/div/div/div/div/div[3]/div/div[2]/button') == False or j >= 15): break #Max 15 photos r.click('/html/body/div[9]/div/div/div/div/div[3]/div/div[2]/button') i=i+1 r.click('/html/body/div[9]/div/div/div/section/div/div[1]/div/button') print('Done.')
def today_dollar(): r.init() r.url('https://www.melhorcambio.com/dolar-hoje') sleep(4) dolar = r.read('//*[@id="comercial"]') sleep(2) print(f'{dolar} | {datetime.today()}') r.close() return dolar
def dollar_cannadian(): r.init() r.url('https://www.melhorcambio.com/dolar-hoje') sleep(4) r.click('//*[@id="dock"]/ul/li[2]/a') sleep(4) dolar_c = r.read('//*[@id="comercial"]') sleep(2) print(f'{dolar_c} | {datetime.today()}') r.close() return dolar_c
def do_web_auto(search_data): r.init() r.url('https://www.google.com') r.click('.gLFyf.gsfi') r.type('.gLFyf.gsfi', f'{search_data}[enter]') #r.type("//*[@name='q']', 'decentralization[enter]") #print(r.read('result-stats')) r.snap('page', 'results.png') res = r.read('result-stats') r.close() return res
def gestionar_uuii(uuii, codigoPostal, cto): r.url(uuii) r.timeout(120) if r.read('//*[@id="btnNew"]'): r.click('//*[@id="btnNew"]') r.timeout(120) if r.read('//*[@id="btnFormEditSave"]'): r.select('//*[@name="planta"]', 'BA') r.type('//*[@name="cp"]', codigoPostal) r.click('//*[@id="btnFormEditSave"]') if r.read('//*[@class="tableCell"]'): r.click('//*[@class="tableCell"]') r.click('//*[@id="btnRD"]') if r.read('//*[@id="btnFormRDSave"]'): r.timeout(120) r.select('//*[@name="rd"]', cto) r.wait(5) if r.read('//*[@id="ctoAddress_ctoName"]'): r.click('//*[@id="btnFormRDSave"]') if r.url('xxxx.do?dispatchMethod=rdSave'): print('Gestionada')
def extract_stay_info_as_data( ): #Generates URL/text in dict instead, shorten time for upload/download, more unified data = { "0": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "", "coordinates": "" }, "1": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "", "coordinates": "" }, "2": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "", "coordinates": "" }, "3": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "", "coordinates": "" }, "4": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "", "coordinates": "" } } print('Extracting Top 5 Stay Picture Information (10 Image Max)..') url = [] url = get_stay_url() i = 0 k = 0 while (i < 5): data[str(i)]["url"] = url[i + k] r.url(url[i + k]) print(f'Extracting Text Data - Homestay {i+1}') if (r.exist('//*[@itemprop="name"]/span/h1/span') == True): data[str(i)]["coordinates"] = r.read( '//*[@data-veloute="map/GoogleMap"]/div/div/div/div[2]/a/@href' ).split("=", 1)[1].split("&", 1)[0] data[str(i)]["name"] = r.read('//*[@itemprop="name"]/span/h1/span') data[str(i)]["description"] = r.read( '//*[@href="#neighborhood"]/div') #data[str(i)]["description"]=data[str(i)]["description"].replace("\xa0"," ") data[str(i)]["inventory"] = r.read( '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[1]/div' ) + " " + r.read( '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[2]/div' ) + " " + r.read( '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[3]/div' ) + " " + r.read( '//*[@id="room"]/div[2]/div/div[2]/div/div/div[3]/div/div/div[1]/div/div/div[4]/div' ) if (r.present('//*[@id="book_it_form"]/div[4]/div[2]') == True): data[str(i)]["price"] = r.read( '//*[@id="book_it_form"]/div[4]/div[2]').split("Total", 1)[1] else: data[str(i)]["price"] = r.read( '//*[@id="book_it_form"]/div[2]').split("Total", 1)[1] #Total Price if r.present('//*[@data-heading-focus="review header"]/div'): data[str(i)]["rating"] = r.read( '//*[@data-heading-focus="review header"]/div/div/@aria-label' ) + " (" + r.read( '//*[@data-heading-focus="review header"]/div/span') + ")" else: data[str(i)]["rating"] = "No Reviews Yet" r.click('//*[@data-veloute="hero-view-photos-button"]') j = 0 while (1): j = j + 1 print(f'Extracting Picture Data - Homestay {i+1} Photo {j}') r.wait(0.4) #r.snap('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img',f"data/{i+1}/{j}.jpg") #fastest but not perfect if (r.exist('//img[@data-veloute="slideshow-image"]/@src') == True): data[str(i)]["picurl"][j - 1] = r.read( '//img[@data-veloute="slideshow-image"]/@src') if (r.present( '//*[@data-veloute="slideshow-modal"]/div/div/div[2]/div[2]/div[2]/div[2]/div' ) == True): data[str(i)]["pictext"][j - 1] = r.read( '//*[@data-veloute="slideshow-modal"]/div/div/div[2]/div[2]/div[2]/div[2]/div' ) #r.download(dl_link,f'data/{i+1}/{j}.jpg') print(f'Homestay {i+1} Photo {j} extracted!') if (r.exist('//button[@aria-label="Next"]') == False or j >= 10): break r.click('//button[@aria-label="Next"]') else: i = i - 1 #Detects Whales (Airbnb Plus spoils the format alot) k = k + 1 #Compensating Constant k print("WHALE detected, adding one more loop..") i = i + 1 #r.click('/html/body/div[9]/div/div/div/section/div/div[1]/div/button') print('Done.') return data
# input the values to the rpachallenge website through web automation # this makes use of xpath (using chrome, inspect the element > right-click on highlighted element > # copy > copy xpath) bot.init() bot.url('http://www.rpachallenge.com') # start the clock bot.click('/html/body/app-root/div[2]/app-rpa1/div/div[1]/div[6]/button') # loop through the records and input onto website for r in range(2, sheet.max_row + 1): # first name bot.type('//*[@ng-reflect-name="labelFirstName"]', sheet.cell(row=r, column=1).value) # last name bot.type('//*[@ng-reflect-name="labelLastName"]', sheet.cell(row=r, column=2).value) # company name bot.type('//*[@ng-reflect-name="labelCompanyName"]', sheet.cell(row=r, column=3).value) # role in company bot.type('//*[@ng-reflect-name="labelRole"]', sheet.cell(row=r, column=4).value) # address bot.type('//*[@ng-reflect-name="labelAddress"]', sheet.cell(row=r, column=5).value) # email bot.type('//*[@ng-reflect-name="labelEmail"]', sheet.cell(row=r, column=6).value) # phone number bot.type('//*[@ng-reflect-name="labelPhone"]', str(sheet.cell(row=r, column=7).value)) # click submit bot.click('/html/body/app-root/div[2]/app-rpa1/div/div[2]/form/input') # get result print('RPA Challenge result: ' + bot.read('/html/body/app-root/div[2]/app-rpa1/div/div[2]/div[2]')) # take screenshot bot.snap('page', 'rpa-challenge-result.png') # close the browser bot.close()
import rpa as r # use init() to start TagUI, it auto downloads TagUI on first run # default init(visual_automation = False, chrome_browser = True) r.init() # use url('your_url') to go to web page, url() returns current URL r.url('https://ca.yahoo.com') # use type() to enter text into an UI element or x, y location # '[enter]' = enter key, '[clear]' = clear field r.type('search-box', 'github') # use read() to fetch and return text from UI element search_text = r.read('search-box') print(search_text) # use click() to click on an UI element or x, y location # rclick() = right-click, dclick() = double-click r.click('search-button') r.wait(6.6) # use snap() to save screenshot of page or UI element # page = web page, page.png = computer screen r.snap('page', 'results.png') r.snap('logo', 'logo.png') r.wait(4.4) r.click('GitHub') r.wait(10)
#,'Lavender','Lim_Chu_Kang','Little_India','Macpherson','Mandai','Marine_Parade','Mount_Sophia','Mountbatten','Newton','Novena','Orchard' #,'Outram','Pasir_Panjang','Pasir_Ris','Paya_Lebar','Potong_Pasir','Pulau_Ubin','Punggol','Queenstown','Raffles_Place','Redhill','River_Valley' #,'Robertson_Quay','Seletar','Sembawang','Sengkang','Sentosa','Serangoon','Serangoon_Gardens','Siglap','Simei','Sixth_Avenue','Somerset','Tampines' #,'Tanglin','Tanglin_Halt','Tanjong_Pagar','Tanjong_Rhu','Telok_Blangah',] size = len(area_list) for a in range(size): URL = f'https://www.yelp.com/search?find_desc=Restaurants&find_loc=Singapore&l=p%3ASG-SG%3ASingapore%3A%3A{area_list[a]}' r.url(URL) time.sleep(10) URL_list = [] maxpage = int( r.read( '//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[2]/span' ).replace("1 of ", "")) for j in range(0, maxpage): if j != 0: if r.present( f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[2]' ): r.click( f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[2]' ) else: r.click( f'(//*[@id="wrap"]/div[3]/div[2]/div/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div/div/span/a/span)[1]' )
import rpa as r r.init() r.url('https://cn.bing.com/') r.type('//*[@name="q"]', 'decentralization[enter]') print(r.read('result-stats')) r.snap('page', 'results.png') r.close()
from time import sleep from datetime import datetime # Email modules import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.mime.base import MIMEBase from email import encoders r.init() r.url('https://www.melhorcambio.com/dolar-hoje') sleep(4) window = auto.getActiveWindow() window.maximize() dolar = r.read('//*[@id="comercial"]') sleep(2) r.snap('page', 'img3.png') sleep(2) # window.close() fecha todas as janelas do # navegador. Já o r.close() fecha apenas a # janela do bot. r.close() print(dolar) # Session of email text_email = f'Dolar hoje: {dolar}\nDay: {str(datetime.today())}' # email sender, password and destiny fromadr = '*****@*****.**' pwd = '###########'
import rpa as r import pandas as pd import time import pickle #Get URLs r.init() URL = f'https://www.tripadvisor.com.sg/Restaurants-g294265-Singapore.html' r.url(URL) time.sleep(10) maxpage = int(r.read('//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/div/a[6]/@data-page-number')) for j in range (0,maxpage): URL_list = [] if j!=0: if r.present(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[2]'): r.click(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[2]') else: r.click(f'(//*[@id="EATERY_LIST_CONTENTS"]/div[2]/div/a)[1]') time.sleep(10) for i in range (1,100): if r.exist(f'(//*[@id="component_2"]/div/div[*]/span/div[1]/div[2]/div[1]/div/span/a/@href)[{i}]') == False: break URL_list.append("https://www.tripadvisor.com.sg" + r.read(f'(//*[@id="component_2"]/div/div[*]/span/div[1]/div[2]/div[1]/div/span/a/@href)[{i}]')) #print(URL_list) with open(f'url_list_{j}.txt', 'wb') as filehandle: pickle.dump(URL_list, filehandle)
usep_pred_arr = usep_lr.predict(np.array([[demand_predicted]])) usep_pred = usep_pred_arr[0][0] return demand_predicted, usep_pred print(get_period_nbr()) period_nbr = get_period_nbr() today = datetime.today() #os.chdir("D:") r.init() r.url('https://www.emcsg.com/marketdata/priceinformation') r.wait(5) print( r.read( '//table[@class="ptable realtimePriceTable"]//tr[@class="current"]')) data_list = r.read( '//table[@class="ptable realtimePriceTable"]//tr[@class="current"]' ).splitlines() data_list[0] = today.strftime('%d/%m/%Y') data_list[1] = get_period_nbr() data_list[2] = float(data_list[2]) data_list[3] = float(data_list[3]) data_list[4] = float(data_list[4]) data_list[6] = float(data_list[6]) if data_list[6] == 0: data_list.append(0) else: data_list.append(1) data_list = [data_list] df_current = DataFrame(data_list,
r.wait(1) r.click('pnlFiltreBtn') r.wait(3) total_items = r.count('div.col-sm-12') print("Toplam İhale Sayısı =",total_items) r.wait(3) for x in range(0, total_items): print(x+1,". İhale") worksheet.write(x+1, 0, x+1) ihale_adi = r.read('//*[@id="sonuclar"]/div[' + str(x+1) + ']/div/div/div/div[2]/div/div/p[1]') print(ihale_adi) worksheet.write(x+1, 1, ihale_adi) idare_adi = r.read('//*[@id="sonuclar"]/div[' + str(x+1) + ']/div/div/div/div[3]/div/div/p') print(idare_adi) worksheet.write(x+1, 2, idare_adi) ihale_detayi = r.read('//*[@id="sonuclar"]/div[' + str(x+1) + ']/div/div/div/div[2]/div/div/p[2]') print(ihale_detayi) worksheet.write(x+1, 3, ihale_detayi) workbook.close() r.close()
# to use in Jupyter notebook, Python script or interactive shell import rpa as r # use init() to start TagUI, it auto downloads TagUI on first run # default init(visual_automation = False, chrome_browser = True) r.init() # use url('your_url') to go to web page, url() returns current URL r.url('https://ca.yahoo.com') # use type() to enter text into an UI element or x, y location # '[enter]' = enter key, '[clear]' = clear field r.type('search-input', 'github') # use read() to fetch and return text from UI element search_text = r.read('search-input') print(search_text) # use click() to click on an UI element or x, y location # rclick() = right-click, dclick() = double-click r.click('search-button') # use wait() to wait for a number of seconds # default wait() is 5 seconds r.wait(6.6) # use snap() to save screenshot of page or UI element # page = web page, page.png = computer screen r.snap('page', 'results.png') r.snap('logo', 'logo.png')
import rpa import pyautogui rpa.init(visual_automation=False, chrome_browser=True) rpa.url('https://www.melhorcambio.com/dolar-hoje') rpa.wait(5.0) janela = pyautogui.getActiveWindow() janela.maximize() rpa.type('//*[@id="original"]', '10') pyautogui.sleep(2) dolar_comercial = rpa.read('//*[@id="comercial"]') pyautogui.sleep(2) print(dolar_comercial) janela.close()
def urlSnap(): r.init(visual_automation=True) r.url('https://www.google.com') r.type("q", "CEG[enter]") print(r.read('result-stats')) r.snap('page', 'results.png')
def sigaaRPA(self): r.init() r.timeout(30) while(self.state > 0): if self.terminateBot: r.close() break elif self.state == 1: # use url('your_url') to go to web page, url() returns current URL r.url('https://sigaa.upb.edu.co/ssomanager/c/SSB') self.state = self.state + 1 elif self.state == 2: # use type() to use the keyboard to write something if r.exist(X.username) & r.present(X.username): r.type(X.username, '000290164') r.type(X.password, 'Tandres1997_') self.state = self.state + 1 else: print("Couldn\'t find Username and Password Components") self.state = 1 elif self.state == 3: # use click() to click on an UI element or x, y location self.state = click(X.login, self.state) elif self.state == 4: ## hace click en Estudiantes self.state = click(X.estudiantes, self.state) elif self.state == 5: ## Hace click en Seguimiento a la formación self.state = click(X.seguimieto, self.state) elif self.state == 6: ## hace click en Calificaciones parciales self.state = click(X.calif, self.state) elif self.state == 7: ## Selecciona el semestre del cual quiere mirar las notas r.select(X.semester, self.semester) self.state = self.state + 1 elif self.state == 8: ## se hace click en enviar r.click(X.enviar) self.state = self.state + 1 elif self.state == 9: tablexpath = '' r.wait(2) numCursos = r.count('//*[@class="datadisplaytable"][2]/tbody/tr/td/a') for i in range(2,numCursos+2): tablexpath = '//*[@class="datadisplaytable"][2]/tbody/tr['+ str(i) +']/td/a' if r.exist(tablexpath): r.click(tablexpath) r.wait(1) pagetitle = r.read('//div[@id="pagetitle"]') if pagetitle == 'Detalle de Calificación de Componente': materia = r.read('//*[@class="datadisplaytable"][1]/tbody/tr[5]/td[2]') print(materia) r.snap('page', './notas/s'+self.semester+'/'+ materia +'.png') # r.table('//table[@class="datadisplaytable"][2]', './csv/table'+str(i-1)+'.csv') r.dom('history.back()') # use wait() to wait for a number of seconds # default wait() is 5 seconds r.wait(5) self.terminateBot = True elif self.state == 10: r.dom('history.back()')
print("Posicion: " + str(i)) #print(list[i]) r.url(list[i]) reps=0 err=False while tempNum==atmNumber: reps+=1 if reps>5: break # checkWhatsError = r.read('//*[@id="fallback_block"]/div/a') # print('Error '+ checkWhatsError) # if checkWhatsError == 'Descargar': # print('Error found') # err=True # break tempNum = r.read('//*[@id="main_block"]/div[1]/h1/p/span') print(atmNumber + " ==? " + tempNum) if n ==0: break r.wait(0.2) #if err: # continue if reps >5: print('Atasco capturar este error') numErroneosURL.append(list[i]) numErroneos.append(atmNumber) cantidadNoEnviados +=1 continue atmNumber = tempNum r.click('//*[@id="action-button"]') r.wait(0.3)
import rpa as r r.init(visual_automation=True, chrome_browser=False) print(r.read('pdf_window.png')) print(r.read('image_preview.png')) r.hover('anchor_element.png') print(r.read(r.mouse_x(), r.mouse_y(), r.mouse_x() + 400, r.mouse_y() + 200)) r.close()
links = int(purchase_history_number) row = 1 for link in range(1, links + 1): t.sleep(1) total_items = int( r.count('section.o_c-card-history')) + 1 # total of items per page item = 1 t.sleep(1) while item < total_items: # key_full = key_name1 + str(item) + key_name2 # this f" is for new python 3.6 text format; it allows for changable variables inside the string by using {} key_full = f"/html/body/div[1]/div[2]/section/div[1]/section[2]/div/div/section[{str(item)}]" \ f"/div[2]/div/div[1]/div[2]" price_full = price_part1 + str(item) + price_part2 var = r.read(key_full) t.sleep(1) r.timeout(5) if not r.exist(price_full): var2 = '0.00' else: var2 = r.read(price_full) t.sleep(1) sheet.cell(row=row, column=1).value = var sheet.cell(row=row, column=2).value = var2 # you can add records to a list, then use append as well # sheet.append(list) print(var) row += 1 item += 1 minus_link = int(purchase_history_number) - link
import rpa as r import pyautogui as p import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import pandas as pd r.init() r.url('https://www.melhorcambio.com/dolar-hoje') p.sleep(4) dollar_value = r.read('//*[@id="comercial"]') r.close() #texto do email texto_email = 'A cotação do dolar hoje ' + str( pd.Timestamp('today')) + ' é ' + dollar_value # email remetente, senha, destinatário de = '*****@*****.**' senha = '*********' para = '*****@*****.**' # Setup the MIME message = MIMEMultipart() message['From'] = de message['To'] = para message['Subject'] = 'Cotação do dolar' #Título do e-mail
def extract_stay_info_as_data( ): #Generates URL/text in dict instead, shorten time for upload/download, more unified data = { "0": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "" }, "1": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "" }, "2": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "" }, "3": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "" }, "4": { "name": "", "description": "", "inventory": "", "price": "", "rating": "", "picurl": [None] * 10, "pictext": [None] * 10, "url": "" } } print('Extracting Top 5 Stay Picture Information (10 Image Max)..') url = [] url = get_stay_url() i = 0 k = 0 while (i < 5): data[str(i)]["url"] = url[i + k] r.url(url[i + k]) print(f'Extracting Text Data - Homestay {i+1}') if (r.exist( '//*[@data-plugin-in-point-id="TITLE_DEFAULT"]/div/div/section/div/div/h1' ) == True): data[str(i)]["name"] = r.read( '//*[@data-plugin-in-point-id="TITLE_DEFAULT"]/div/div/section/div/div/h1' ) data[str(i)]["description"] = r.read( '//*[@data-plugin-in-point-id="OVERVIEW_DEFAULT"]/div/div/div/section/div/div/div/div/div' ) data[str(i)]["description"] = data[str(i)]["description"].replace( "\xa0", " ") data[str(i)]["inventory"] = r.read( '//*[@data-plugin-in-point-id="OVERVIEW_DEFAULT"]/div/div/div/section/div/div/div/div/div[2]' ) data[str(i)]["price"] = r.read( '//*[@data-plugin-in-point-id="BOOK_IT_SIDEBAR"]/div/div[2]/div/ul[2]/li/span[2]' ) #Total Price if r.present( '//*[@data-plugin-in-point-id="REVIEWS_DEFAULT"]/div/div/section/div/div/div/h2/span[2]/span' ): data[str(i)]["rating"] = r.read( '//*[@data-plugin-in-point-id="REVIEWS_DEFAULT"]/div/div/section/div/div/div/h2/span[2]/span' ) else: data[str(i)]["rating"] = "No Reviews Yet" r.click('//*[@id="FMP-target"]') j = 0 while (1): j = j + 1 print(f'Extracting Picture Data - Homestay {i+1} Photo {j}') r.wait(0.4) #r.snap('//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img',f"data/{i+1}/{j}.jpg") #fastest but not perfect if (r.exist( '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src' ) == True): data[str(i)]["picurl"][j - 1] = r.read( '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div/div/img/@src' ) if (r.present( '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div[2]/div/span/div/span' ) == True): data[str(i)]["pictext"][j - 1] = r.read( '//div[@data-testid="photo-viewer-slideshow-desktop"]/div/div/div/div[2]/div/span/div/span' ) #r.download(dl_link,f'data/{i+1}/{j}.jpg') print(f'Homestay {i+1} Photo {j} extracted!') if (r.exist('//*[@aria-label="Next"]') == False or j >= 10): break r.click('//*[@aria-label="Next"]') else: i = i - 1 #Detects Whales (Airbnb Plus spoils the format alot) k = k + 1 #Compensating Constant k print("WHALE detected, adding one more loop..") i = i + 1 #r.click('/html/body/div[9]/div/div/div/section/div/div[1]/div/button') print('Done.') return data
import rpa as r import pandas as pd import time #Read all restaurant names in csv yelp = pd.read_csv("yelpindex.csv") yelp["URL"].head r.init() #Fill up table with URLs for i in (0, len(yelp["URL"])): r.url(yelp["URL"][i]) if r.present("//*[@id='rso']/div[1]/div/div[1]/a/@href"): name = r.read("//*[@id='rso']/div[1]/div/div[1]/a/@href") else: name = r.read("//*[@id='rso']/div[2]/div/div[1]/a/@href") print(name) yelp["URL"][i] = name time.sleep(5) yelp.to_csv("yelpindex_updated.csv") yelp2TA = pd.read_csv("yelpindex_updated.csv") yelp2TA.reset_index(drop=True) yelp2TA["URL"].head #Get Reviews author_loc = "" reviews_df = pd.DataFrame() #if y == 9: init = 33
# to use in Jupyter notebook, Python script or interactive shell import rpa as r # use init() to start TagUI, it auto downloads TagUI on first run # default init(visual_automation = False, chrome_browser = True) r.init() # use url('your_url') to go to web page, url() returns current URL r.url('https://ca.yahoo.com') # use type() to enter text into an UI element or x, y location # '[enter]' = enter key, '[clear]' = clear field r.type('ybar-sbq', 'github') # use read() to fetch and return text from UI element search_text = r.read('ybar-sbq') print(search_text) # use click() to click on an UI element or x, y location # rclick() = right-click, dclick() = double-click r.click('ybar-search') # use wait() to wait for a number of seconds # default wait() is 5 seconds r.wait(6.6) # use snap() to save screenshot of page or UI element # page = web page, page.png = computer screen r.snap('page', 'results.png') r.snap('logo', 'logo.png')
# r.wait(3) # r.keyboard('https://1839.trust.codes/xCJEHBoK[enter]') # r.wait(2) r.init(visual_automation = True) r.url('https://www.foodsafety.govt.nz/registers-lists/exporters/index.htm?setup_file=exporters-ssi.setup.cgi&rows_to_return=20000&submit_search=Search') time.sleep(10) tr = '//form[@ACTION="/registers-lists/exporters/index.htm"]//tr' show_email_button = '//a[@class="obf-show-email"]' count = r.count(tr) print(count) for i in range(count): button = f"{tr}[{i+1}]{show_email_button}" r.click(button) print(f"clicked {button}") print('clicked all') time.sleep(99999999) result = r.read('page') r.close() target_file = '/Users/teemo/Downloads/www.foodsafety.govt.nz_registered_list.txt' with open(target_file, "w") as f: f.write(result)