def main(url, mode='txt'): if os.path.exists('baojianhui_log.txt'): with open('baojianhui_log.txt', 'r', encoding='utf-8') as f: params = f.read().split(',') curr_page = params[0] else: # 如果是第一次执行,全取初始值; curr_page = 1 url_link = str(url) + str(curr_page) web_init(url_link) total_page = int( t.read(element_identifier='//div[@class = "ng-binding"][last()]'). split('/')[-1]) while int(curr_page) < int(total_page): #从1开始,做完之后翻页; main_operation(url, mode) #如果有页可翻,就翻页 print('click once') t.click(element_identifier='//a[@ng-click = "pager.next()"]') #翻页 t.wait(5) curr_page = int( t.read(element_identifier='//div[@class = "ng-binding"][last()]'). split('/')[0]) with open('baojianhui_log.txt', 'w', encoding='utf-8') as f: f.write(str(curr_page) + ',' + str(1) + ',' + str(1)) #翻页之后,index重置;i更新; if curr_page == total_page: main_operation(url, mode) #如果是最后一页了,只需要做一次main t.close() return True
def multi_city_trip(enquiry): t.click('//input[@id="flight-type-multi-dest-hp-flight"]') travel_dates = enquiry["dates"] numDep = len(travel_dates) cities = enquiry["city"] numCity = len(cities) form_flightleg = (t.count( '//div[@class="cols-nested gcw-multidest-flights-container"]/div/fieldset' )) print(form_flightleg) if numDep < form_flightleg: for cnt in range(form_flightleg - numDep): t.click( f'//*[@id="flightlegs-list-fieldset-{form_flightleg-cnt}-hp-flight"]/div/a' ) elif numDep > form_flightleg: for cnt in range(numDep - form_flightleg): t.click('//a[@id="add-flight-leg-hp-flight"]') t.wait(0.5) t.type('//input[@id="flight-origin-hp-flight"]', cities[0]) t.type('//input[@id="flight-destination-hp-flight"]', cities[1]) t.type('//input[@id="flight-departing-single-hp-flight"]', '[clear]') t.type('//input[@id="flight-departing-single-hp-flight"]', (dt.strptime(travel_dates[0], '%d/%m/%Y')).strftime("%d/%m/%Y")) for num in range(1, numDep): print(f"num:{num} and form_flightleg:{form_flightleg}") start_date = dt.strptime(travel_dates[num], '%d/%m/%Y') orig_city = cities[num] if numCity == numDep: if num < numDep - 1: dest_city = cities[num + 1] else: dest_city = cities[0] else: dest_city = cities[num + 1] t.type(f'//input[@id="flight-{num+1}-origin-hp-flight"]', orig_city) t.wait(0.5) t.type(f'//input[@id="flight-{num+1}-destination-hp-flight"]', dest_city) t.wait(0.5) t.type(f'//input[@id="flight-{num+1}-departing-hp-flight"]', '[clear]') t.type(f'//input[@id="flight-{num+1}-departing-hp-flight"]', start_date.strftime("%d/%m/%Y")) t.click('//a[@id="flight-advanced-options-hp-flight"]') t.select('//select[@id="flight-advanced-preferred-class-hp-flight"]', lookup_cabin_class(enquiry["cabin_class"])) t.click('//*[@id="gcw-flights-form-hp-flight"]/div[8]/label/button')
def return_trip(enquiry): start_date = dt.strptime(enquiry["dates"][0], '%d/%m/%Y') end_date = dt.strptime(enquiry["dates"][1], '%d/%m/%Y') t.click('//input[@id="flight-type-roundtrip-hp-flight"]') t.type('//input[@id="flight-origin-hp-flight"]', enquiry["city"][0]) t.type('//input[@id="flight-destination-hp-flight"]', enquiry["city"][1]) t.type('//input[@id="flight-departing-hp-flight"]', '[clear]') t.type('//input[@id="flight-departing-hp-flight"]', start_date.strftime("%d/%m/%Y")) t.click('//*[@id="traveler-selector-hp-flight"]/div/ul/li/button') t.click('//a[@id="flight-advanced-options-hp-flight"]') t.select('//select[@id="flight-advanced-preferred-class-hp-flight"]', lookup_cabin_class(enquiry["cabin_class"])) t.click('//*[@id="gcw-flights-form-hp-flight"]/div[8]/label/button') tu.wait_for_pageload('//button[@id="flights-advanced-options-toggle"]') curr_enddate = t.read('//input[@id="return-date-1"]') if curr_enddate != end_date.strftime("%d/%m/%Y"): t.type('//input[@id="return-date-1"]', '[clear]') t.type('//input[@id="return-date-1"]', end_date.strftime("%d/%m/%Y")) t.click('//*[@id="flight-wizard-search-button"]')
def one_way_trip(enquiry): start_date = dt.strptime(enquiry["dates"][0], '%d/%m/%Y') t.click('//input[@id="flight-type-one-way-hp-flight"]') t.type('//input[@id="flight-origin-hp-flight"]', enquiry["city"][0]) t.type('//input[@id="flight-destination-hp-flight"]', enquiry["city"][1]) t.type('//input[@id="flight-departing-single-hp-flight"]', '[clear]') t.type('//input[@id="flight-departing-single-hp-flight"]', start_date.strftime("%d/%m/%Y")) t.click('//*[@id="traveler-selector-hp-flight"]/div/ul/li/button') t.click('//a[@id="flight-advanced-options-hp-flight"]') t.select('//select[@id="flight-advanced-preferred-class-hp-flight"]', lookup_cabin_class(enquiry["cabin_class"])) t.click('//*[@id="gcw-flights-form-hp-flight"]/div[8]/label/button')
def rpa_process(from_date, phone_number, token): t.init() t.url("https://sangam-test-website.herokuapp.com/cancel_input") util.wait_for_pageload('//button[@id="btnsubmit"]') t.click('//input[@id="txtHandNo"]') t.type('//input[@name="txtHandNo"]', phone_number) t.click('//button[@id="btnsubmit"]') util.wait_for_pageload('//button[@id="btnsubmit"]') from_date_obj = from_date from_date = from_date.strftime("%d/%m/%Y") t.click('//label[contains(.,"' + str(from_date) + '")]') t.click('//button[@id="btnsubmit"]') t.close() cancel_appointment_slot(from_date_obj, token)
def flight_search(info): t.url('https://www.expedia.com.sg/') tu.wait_for_pageload('//button[@id="tab-flight-tab-hp"]') t.click('//button[@id="tab-flight-tab-hp"]') fill_search(info) tu.wait_for_pageload('//button[@id="flights-advanced-options-toggle"]') t.click('//button[@id="flights-advanced-options-toggle"]') tu.wait_for_pageload('//select[@id="child-count"]') adult_pax = int(info['adult']) children_pax = len(info['child_age']) children_age = info['child_age'] number_of_travellers(adult_pax, children_pax, children_age) t.click('//*[@id="flight-wizard-search-button"]')
def get_shoe(shoe, g, email): gender = g # print('[nike]',gender) t.init(visual_automation=True) t.url('https://www.nike.com/sg/') t.type('//input[@id = "TypeaheadSearchInput"]', shoe + " shoes") t.click('//button[@class = "btn-search z2 bg-transparent"]') t.wait(3) if gender == " men": t.click('(//span[contains(@class,"filter-item")])[1]') elif gender == " women": t.click('(//span[contains(@class,"filter-item")])[2]') t.wait(1) count = t.count('//a[@class ="product-card__link-overlay"]') # print('[nike]',count) details = [] if count != 0: for i in range(0, min(count, 3)): k = i + 1 name = t.read(f'(//a[@class = "product-card__link-overlay"])[{k}]') price = t.read(f'(//div[@data-test="product-price"])[{k}]') img = t.read( f'(//div[contains(@class, "product-card__hero")]/picture/img)[{k}]/@src' ) link = t.read(f'(//a[contains(@class,"product-card")])[{k}]/@href') # print('[nike]',name , price, img) details.append({ "email": email, "name": name, "price": price, "img": img, "Company": "Nike", "link": link }) else: details.append({ "email": email, "name": "NA", "price": "NA", "img": "NA", "Company": "Nike", "link": "NA" }) # print(details) return details
def number_of_travellers(adult_pax, children_pax, children_age): print(f"Adults: {adult_pax} and Children: {children_pax}") t.click(f'//select[@id="adult-count"]') t.select('//select[@id="adult-count"]', f'{adult_pax}') # set the number of child travellers t.click(f'//select[@id="child-count"]') t.select('//select[@id="child-count"]', f'{children_pax}') # Set the age for each child traveller if children_pax > 0: for m in range(0, children_pax): print(f'Child {m+1} age {str(children_age[m])}') t.click(f'//select[@id="child-age-{m+1}"]') t.wait(1) t.select(f'//select[@id="child-age-{m+1}"]', str(children_age[m])) t.wait(1)
def main_operation(url, mode='txt'): # 当前页面 curr_page = int( t.read(element_identifier='//div[@class = "ng-binding"][last()]'). split('/')[0]) # 点击按钮 list_count = t.count( element_identifier='//div[@class = "list caidan-right-list"]' ) # 循环列表,取出总list有几个 #如果是断点,读取断电数据 if os.path.exists('baojianhui_log.txt'): with open('baojianhui_log.txt', 'r', encoding='utf-8') as f: params = f.read().split(',') curr_page = params[0] start_i = params[1] start_j = params[2] else: #如果是第一次执行,全取初始值; start_i = 1 start_j = 1 #常规操作 for i in range(1, list_count + 1): t.wait(3) if i < int(start_i): continue item_count = t.count( element_identifier='//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"]') # 取出每个list里的具体法规有几条 print('当前是list {}, 里面的元素有 {} 个'.format(str(i), str(item_count))) t.wait(3) for j in range(1, item_count + 1): if j < int(start_j): continue item_title = t.read( element_identifier='//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a') time_suffix = t.read( element_identifier='//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//span[@class = "date ng-binding"]') file_name = item_title + '_' + time_suffix + '.txt' if '/' in file_name: file_name = file_name.replace('/', ' ') if mode == 'txt': #点击 link = t.read(element_identifier= '//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a/@ng-href') prefix = 'http://www.cbirc.gov.cn/cn/view/pages/' final_link = prefix + link t.url(final_link) t.wait(1) while not os.path.exists(file_name): type_1 = t.read( element_identifier='//div[@class = "Section0"]' ) + t.read(element_identifier='//div[@class = "Section1"]') type_2 = t.read( element_identifier='//div[@class = "WordSection1"]') type_3 = t.read( element_identifier= '//div[@class = "wenzhang-content ng-binding"]') if type_1 != '': content = type_1 with open(file_name, 'w', encoding='utf-8') as f: f.write(content) break elif type_2 != '': content = type_2 with open(file_name, 'w', encoding='utf-8') as f: f.write(content) break elif type_3 != '': content = type_3 with open(file_name, 'w', encoding='utf-8') as f: f.write(content) break else: content = ' ' with open(file_name, 'w', encoding='utf-8') as f: f.write(content) break elif mode == 'doc': t.click(element_identifier= '//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a[@ng-click = "fileDownload(x.docFileUrl)"]') doc_id = t.read(element_identifier= '//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a/@ng-href').split('=')[1][:-7] doc_name = doc_id + '.doc' curr_clock = 5 while not os.path.exists(doc_name): t.wait(curr_clock) curr_clock += 5 if curr_clock > MAX_WAIT: break t.wait(2) os.rename(doc_name, item_title + '_' + time_suffix + '.doc') elif mode == 'pdf': t.click(element_identifier= '//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a[@ng-click = "fileDownload(x.pdfFileUrl)"]') pdf_id = t.read(element_identifier= '//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a/@ng-href').split('=')[1][:-7] pdf_name = pdf_id + '.pdf' curr_clock = 5 while not os.path.exists(pdf_name): t.wait(curr_clock) curr_clock += 5 if curr_clock > MAX_WAIT: break t.wait(2) os.rename(pdf_name, item_title + '_' + time_suffix + '.pdf') else: print('unknown format..') t.close() raise Exception("unknown input mode") # 返回主页面 t.url(url + str(curr_page)) t.wait(2) with open('baojianhui_log.txt', 'w', encoding='utf-8') as f: f.write(str(curr_page) + ',' + str(i) + ',' + str(j)) with open('baojianhui_log.txt', 'w', encoding='utf-8') as f: f.write(str(curr_page) + ',' + str(i) + ',' + str(1)) #当前list取完,j更新
def check_availability(reservation_date,reservation_time,party_size,restaurant_name): try: #Convert User Defined Values to System Usable Values reservation_day=reservation_date.split('/')[0] reservation_month =reservation_date.split('/')[1] reservation_month=int(reservation_month)-1 reservation_year =reservation_date.split('/')[2] reservation_time_int=int(reservation_time) start_time_hr= reservation_time[:2] if reservation_time_int>1159: if start_time_hr!="12": start_time_hr=int(start_time_hr)-12 start_time_option = str(start_time_hr)+":"+reservation_time[2:4]+" pm" else: start_time_option = str(start_time_hr)+":"+reservation_time[2:4]+" am" #Booking Parameters chope_url ='https://www.chope.co/singapore-restaurants/category/restaurant/' t.init() t.url(chope_url) t.wait(10) #Date Field t.click(f"(//span[contains(@class,'input-group-addon icon-calendar')])[1]") t.wait(7) boolean_flag=1 while boolean_flag: if t.present(f"//td[@data-handler='selectDay'and @data-year='{reservation_year}' and @data-month='{reservation_month}']/a[text()='{reservation_day}']"): t.click(f"//td[@data-handler='selectDay'and @data-year='{reservation_year}' and @data-month='{reservation_month}']/a[text()='{reservation_day}']") boolean_flag=0 else: t.click('//a[@title="Next"]') t.click(f"//td[@data-handler='selectDay'and @data-month='{reservation_month}']/a[text()='{reservation_day}']") #Time Field t.select(f"//select[contains(@id,'time-field')]",start_time_option) #Number of Diners Field t.click(f"(//span[contains(@class,'input-group-addon icon-person')])[1]") t.select(f"//select[contains(@id,'adults')]",party_size) #Restaurant Field t.type(f"//select[contains(@id,'sb-sel-restaurant')]",restaurant_name) t.click('//button[@id="btn-search"]') t.wait(5) if t.present(f"//div[@class='alert alert-danger']"): print('Not Available') return 0 else: print ('Available') return 1 except: print('Error') return 'Reservation Unsuccessful. Unforunately, the restaurant was not able to accomodate your reservation.'
# use url('your_url') to go to web page, url() returns current URL t.url('https://ca.yahoo.com') # use type() to enter text into an UI element or x, y location # '[enter]' = enter key, '[clear]' = clear field t.type('search-box', 'github') # use read() to fetch and return text from UI element search_text = t.read('search-box') # use echo() to print to output, same as Python print() t.echo(search_text) # use click() to click on an UI element or x, y location # rclick() = right-click, dclick() = double-click t.click('search-button') # use wait() to wait for a number of seconds # default wait() is 5 seconds t.wait(6.6) # use snap() to save screenshot of page or UI element # page = web page, page.png = computer screen t.snap('page', 'results.png') t.snap('logo', 'logo.png') # another example of interacting with a web page # include http:// or https:// in URL parameter t.url('https://duckduckgo.com') t.type('search_form_input_homepage', 'The search engine that doesn\'t track you.')
def click(xpath): wait_element(xpath) t.click(xpath)
def getExpFlightPrice(airline, dep_ref, dur_ref): print(airline) print(dep_ref) print(dur_ref) util.wait_for_pageload('//input[@classes="filter-checkbox"]') t.wait(3) t.click(f'//a[@data-content-id="airlineToggleContainer"]') for i in range(len(dep_ref)): if i == 0: if t.present(f'//input[@id="airlineRowContainer_{airline[i]}"]'): t.wait(3) t.click(f'//input[@id="airlineRowContainer_{airline[i]}"]') else: print('Not match') return 0, '' elif airline[i] != airline[i-1]: if t.present(f'//input[@id="airlineRowContainer_{airline[i]}"]'): t.wait(1) t.click(f'//input[@id="airlineRowContainer_{airline[i]}"]') else: print('Not match') return 0, '' if dep_ref[i][0] == '0': dep_ref[i] = dep_ref[i][1:] if dur_ref[i][-1:] == 'h': dur_ref[i] = dur_ref[i] + ' 0m' else: dur_ref[i] = dur_ref[i] + 'm' print(airline) print(dep_ref) print(dur_ref) util.wait_for_pageload('//button[@data-test-id="select-button"]') t.wait(5) for i in range(t.count(f'//ul[@id="flightModuleList"]//li')): i = i + 1 print(i) dep = t.read(f'(//span[@class="medium-bold"]//span[@data-test-id="departure-time"])[{i}]') if len(dur_ref) == 1: if dep == dep_ref[0]: print('dep OK') dur = t.read(f'(//span[@data-test-id="duration"])[{i}]') t.click(f'(//button[@data-test-id="select-button"])[{i}]') t.wait(5) if t.present('//a[@id="forcedChoiceNoThanks"]'): t.click(f'//a[@id="forcedChoiceNoThanks"]') t.wait(5) for x in range(5): print(x) if t.popup('Flight-Information?'): break else: t.wait(5) price = t.read(f'(//span[@class="packagePriceTotal"])[2]') price = float(price.replace(',', '').replace('SG', '').replace('$', '').replace(' ', '')) print(price) url = t.url() return price, url else: return 0, '' elif len(dur_ref) == 2: print('trip', len(dur_ref)) if dep == dep_ref[0]: print('dep OK') dur = t.read(f'(//span[@data-test-id="duration"])[{i}]') t.click(f'(//button[@data-test-id="select-button"])[{i}]') t.wait(5) util.wait_for_pageload('//button[@data-test-id="select-button"]') t.click(f'//input[@id="airlineRowContainer_{airline[1]}"]') t.wait(2) for j in range(t.count(f'//ul[@id="flightModuleList"]//li')): j = j + 1 print(j) dep = t.read(f'(//span[@data-test-id="departure-time"])[{j}+1]') if dep == dep_ref[1]: print('return dep ok') dur = t.read(f'(//span[@data-test-id="duration"])[{j}+1]') if dur == dur_ref[1]: t.click(f'(//button[@data-test-id="select-button"])[{j}]') t.wait(5) if t.present('//a[@id="forcedChoiceNoThanks"]'): t.click(f'//a[@id="forcedChoiceNoThanks"]') t.wait(5) for x in range(5): print(x) if t.popup('Flight-Information?'): break else: t.wait(5) util.wait_for_pageload('//h1[@class="section-header-main"]') price = t.read(f'(//span[@class="packagePriceTotal"])[2]') price = float(price.replace(',', '').replace('SG', '').replace('$', '').replace(' ', '')) print(price) url = t.url() print(url) return price, url else: return 0, '' elif len(dur_ref) >= 3: dep_lst = [] dur_lst = [] print('multi-trip ', len(dur_ref)) for k in range(len(dur_ref)): dep_lst.append(t.read(f'(//span[@data-test-id="departure-time"])[{3*i+k+1}]')) dur_lst.append(t.read(f'(//span[@data-test-id="duration"])[{3*i+k+1}]')) print(dep_lst) print(dep_ref) if dep_lst == dep_ref: print(dur_lst) print(dur_ref) if dur_lst == dur_ref: t.click(f'(//button[@data-test-id="select-button"])[{j}]') t.wait(5) if t.present('//a[@id="forcedChoiceNoThanks"]'): t.click(f'//a[@id="forcedChoiceNoThanks"]') t.wait(5) for x in range(5): print(x) if t.popup('Flight-Information?'): break else: t.wait(5) price = t.read(f'(//span[@class="packagePriceTotal"])[2]') price = float(price.replace(',', '').replace('SG', '').replace('$', '').replace(' ', '')) print(price) url = t.url() print(url) return price, url else: return 0, ''
while PROCEED == True: batch_count += 1 t.url(url) print(f'\n-----start batch {batch_count}-----\n') # start date t.select('//select[@id="searchForm_selectedFromPeriodProjectName"]', START_DATE) # end date t.select('//select[@id="searchForm_selectedToPeriodProjectName"]', END_DATE) # type of sale t.click('//label[@for="checkbox1"]') t.click('//label[@for="checkbox2"]') t.click('//label[@for="checkbox3"]') project_total = t.count('//div[@id="projectContainerBox"]/a') # select projects for _ in range(SELECTION_LIMIT): if project_count > project_total - 1: PROCEED = False break selected = t.read(f'//*[@id="addToProject_{project_count}"]') print(f'select {selected}') t.click(f'//*[@id="addToProject_{project_count}"]')
def gethistorylist(inputyear): # 获取xxxx年的数据 input = inputyear date_start = input + '-01-01' #一年开始的日期 (试一试10天的) date_end = input + '-12-31' #一年结束的日期 #初始化页面 t.init() #输入url进入 t.url("http://bank.jrj.com.cn/bankpro/data.shtml?type=1") #鼠标放上去,点击精简选项 t.hover(element_identifier='//*[@id="fxr"]') t.click(element_identifier='//*[@id="zksq"]') #鼠标移动到发行日期上,点击文本栏,输入发行日日期为今日,点击搜索 t.hover(element_identifier='//*[@id="fxr"]') t.click(element_identifier='//*[@id="fxr"]') t.type(element_identifier='//*[@id="fxr"]', text_to_type=date_start) #再点击,确保日期不会遮住底下的搜索按钮 t.click(element_identifier='//*[@id="fxr"]') t.hover(element_identifier='//*[@class="ipf01"]') t.click(element_identifier='//*[@class="ipf01"]') #把展示的尺寸设置为50个产品每页: t.hover(element_identifier='//*[@data-pagesize="50"]') t.click(element_identifier='//*[@data-pagesize="50"]') #点击以发行日升序排行,等价于"倒过来取" t.hover(element_identifier='//*[@data-sort = "sell_org_date"]') t.click(element_identifier='//*[@data-sort = "sell_org_date"]') #当下一页没有被disable的时候,有以下超参数 page_curr = 1 #当前页面index value_dict = {} #存放data #存放列名 name_list = [ '序号', '产品名称', '发行银行', '委托货币', '发行日', '停售日', '管理期(天)', '预期收益率', '到期收益率', '与同期储蓄比', '综合评级', 'url' ] for col_name in name_list: value_dict.setdefault(col_name, []) #初始化空数据集 #当可以翻页,或数据只有一页的时候,进行循环 stop_flag = False #当当前页面不是最后一页,或只有一页时,都进行如下循环 while (t.read(element_identifier= '//div[@id = "pagefoot"]//a[@class = "cur pf-disabled"]') == str(page_curr)) or (page_curr == 1): if stop_flag == True: #如果没有今年的数据,就没必要翻页了 break #每页的数据量大小(row number) count_values = int( t.count(element_identifier='//tbody[@id = "content"]//tr') ) + 1 # python从0开始 #爬取当前页面 for i in range(1, count_values): # 判定条件:如果是今年内(小于今年12-31或等于12-31的),全都要 if str( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//td[@class = "px"]')) <= date_end: # 序号 value_dict[name_list[0]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[2]')) # 产品名称 value_dict[name_list[1]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[3]')) # 发行银行 value_dict[name_list[2]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[4]')) # 委托货币 value_dict[name_list[3]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[5]')) # 发行日 value_dict[name_list[4]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[6]')) # 停售日 value_dict[name_list[5]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[7]')) # 管理期(天) value_dict[name_list[6]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[8]')) # 预期收益率 value_dict[name_list[7]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[9]')) # 到期收益率 value_dict[name_list[8]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[10]')) # 与同期储蓄比 value_dict[name_list[9]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[11]')) # 综合评级 value_dict[name_list[10]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//td[12]//i/@title')) # url value_dict[name_list[11]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//a/@href')) else: # 如果已经超过今年的数据了,此线程结束,flag置true, while循环结束 stop_flag = True print("thread stops here..") break # 翻页 page_curr += 1 print("turn the page..") # 鼠标模拟移动,并点击翻页 t.hover(element_identifier='//*[@href="' + str(page_curr) + '"]') t.click(element_identifier='//*[@href="' + str(page_curr) + '"]') # #关闭tagui流 t.close() #输出格式为:"今年年份.csv" hist_data = pd.DataFrame(value_dict) #双格式(csv + xlsx 输出) hist_data.to_csv(input + ".csv", index=False, encoding='UTF-8') hist_data.to_excel(input + ".xlsx", index=False, encoding='UTF-8') #gethistorylist('2003')
import tagui as t import getpass t.init(visual_automation = True, chrome_browser = True) t.url('https://accounts.google.com/ServiceLogin/identifier?service=mail&passive=true&rm=false&continue=https%3A%2F%2Fmail.google.com%2Fmail%2F&ss=1&scc=1<mpl=default<mplcache=2&emr=1&osid=1&flowName=GlifWebSignIn&flowEntry=ServiceLogin') t.wait(10) if t.present('//*[@id="identifierId"]')==True: t.keyboard('[alt][tab]') t.wait(4) emailid=t.ask("Enter Email ID") t.wait(10) t.keyboard('[alt][tab]') t.wait(2) t.type('//*[@id="identifierId"]',emailid) t.wait(3) t.click('//*[@id="identifierNext"]/span/span') t.wait(3) t.keyboard('[alt][tab]') t.wait(4) password = getpass.getpass("Enter Password") t.wait(10) t.keyboard('[alt][tab]') t.wait(2) t.type('//*[@id="password"]/div[1]/div/div[1]/input',password) t.wait(3) t.click('//*[@id="passwordNext"]/span/span') t.wait(25) t.keyboard('[tab]') t.wait(3) t.keyboard('[esc]') t.wait(3)
def findAllFile(base): for root, ds, fs in os.walk(base): for f in fs: yield f t.close() logging.basicConfig(filename = "log.txt") srcDirectory = "OrgImage" t.init(visual_automation = True) for target in findAllFile(srcDirectory): target_image = 'OrgImage/' + target t.url('https://www.bing.com') t.click('//div[@id="sb_sbi"]/img') t.upload("input.fileinput",target_image) t.wait(3) succDownload = False image_nums = t.count('//a[@class="richImgLnk"]') print(image_nums) if t.click('//li[contains(string(),"Pages")]') == False: image_nums = 0 t.wait(3) for i in range(1, image_nums): if t.click(f'(//a[@class="richImgLnk"])[{i}]'):
def make_reservation(reservation_date,reservation_time,party_size,restaurant_name,first_name,last_name,email_address,phone_number): try: #Convert User Defined Values to System Usable Values reservation_day=reservation_date.split('/')[0] reservation_month =reservation_date.split('/')[1] reservation_month=int(reservation_month)-1 reservation_year =reservation_date.split('/')[2] reservation_time_int=int(reservation_time) start_time_hr= reservation_time[:2] if reservation_time_int>1159: if start_time_hr!="12": start_time_hr=int(start_time_hr)-12 start_time_option = str(start_time_hr)+":"+reservation_time[2:4]+" pm" else: start_time_option = str(start_time_hr)+":"+reservation_time[2:4]+" am" #Booking Parameters chope_url ='https://www.chope.co/singapore-restaurants/category/restaurant/' t.init() t.url(chope_url) t.wait(10) #Date Field t.click(f"(//span[contains(@class,'input-group-addon icon-calendar')])[1]") t.wait(7) boolean_flag=1 while boolean_flag: if t.present(f"//td[@data-handler='selectDay'and @data-year='{reservation_year}' and @data-month='{reservation_month}']/a[text()='{reservation_day}']"): t.click(f"//td[@data-handler='selectDay'and @data-year='{reservation_year}' and @data-month='{reservation_month}']/a[text()='{reservation_day}']") boolean_flag=0 else: t.click('//a[@title="Next"]') t.click(f"//td[@data-handler='selectDay'and @data-month='{reservation_month}']/a[text()='{reservation_day}']") #Time Field t.select(f"//select[contains(@id,'time-field')]",start_time_option) #Number of Diners Field t.click(f"(//span[contains(@class,'input-group-addon icon-person')])[1]") t.select(f"//select[contains(@id,'adults')]",party_size) #Restaurant Field t.type(f"//select[contains(@id,'sb-sel-restaurant')]",restaurant_name) t.click('//button[@id="btn-search"]') t.wait(5) #Secondary Page to Confirm Timing t.click(f"//a[contains(@rname,'{restaurant_name}') and text()='{start_time_option}']") t.wait(5) t.click(f"//input[@id='btn_sub' and @value='Book Now']") t.wait(5) #Booking Confirmation t.popup('https://book.chope.co/') #First Name t.type('//input[@id="forename"]',first_name) #Last Name t.type('//input[@id="surname"]',last_name) #Email t.type('//input[@id="email"]',email_address) #Phone Number t.type('//input[@id="telephone"]',phone_number) #Agree Terms & Conditions if t.present(f"//input[@name='agree_term_conditions']"): t.click(f"//input[@name='agree_term_conditions']") #Confirm Booking t.click(f"//button[@id='check_book_now']") t.wait(5) t.close() print('Success') schedule_reservation(reservation_date,reservation_time,party_size,restaurant_name,first_name,sample_restaurant_address) return 'Reservation Successful' except: print('Error') return 'Reservation Unsuccessful. Unforunately, the restaurant was not able to accomodate your reservation.'
def getblanklist(): #初始化页面 t.init() #输入url进入 t.url("http://bank.jrj.com.cn/bankpro/data.shtml?type=1") #直接开始搜索,不需要任何筛选条件 t.click(element_identifier='//*[@id="fxr"]') t.hover(element_identifier='//*[@class="ipf01"]') t.click(element_identifier='//*[@class="ipf01"]') #把展示的尺寸设置为50个产品每页: t.hover(element_identifier='//*[@data-pagesize="50"]') t.click(element_identifier='//*[@data-pagesize="50"]') #点击以发行日升序排行,等价于"倒过来取",这样发行日为空的会在最前面 t.hover(element_identifier='//*[@data-sort = "sell_org_date"]') t.click(element_identifier='//*[@data-sort = "sell_org_date"]') #当下一页没有被disable的时候,有以下超参数 page_curr = 1 #当前页面index max_page = 1 # 最大的页面数记录 # 存放列名 value_dict = {} # 存放data name_list = ['序号', '综合评级', 'url'] for col_name in name_list: value_dict.setdefault(col_name, []) # 初始化空数据集 #当可以翻页,或数据只有一页的时候,进行循环 stop_flag = False # 初始化一个flag,flag = true代表我们需要的数据已经取完了,没必要再翻页了 while (t.read(element_identifier= '//div[@id = "pagefoot"]//a[@class = "cur pf-disabled"]') == str(page_curr)) or (page_curr == 1): if stop_flag == True: #如果没有空白数据了,就没必要翻页了 break max_page = page_curr #每页的数据量大小(row number) count_values = int( t.count(element_identifier='//tbody[@id = "content"]//tr') ) + 1 # python从0开始 # 爬取页面所有一个table里的值 filename = str(page_curr) + "blank_date.csv" t.wait(1) # 等1秒,万一加载错误了 t.table( element_identifier='//div[@class = "table-s1 tab-s2 w100"]//table', filename_to_save=filename) #爬取当前页面 (只有title和href) for i in range(1, count_values): # 判定条件:如果发行日是空(--),进入此if if str( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//td[@class = "px"]')) == '--': # print("number {} is running".format(str(i))) # 序号 value_dict[name_list[0]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[2]')) # 综合评级 value_dict[name_list[1]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//td[12]//i/@title')) # url value_dict[name_list[2]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//a/@href')) else: # 如果不再是空值-- ,此线程结束,flag置true, while循环结束 stop_flag = True # print("thread stops here..") break # 翻页 page_curr += 1 # print("turn the page..") # 鼠标模拟移动,并点击翻页 t.hover(element_identifier='//*[@href="' + str(page_curr) + '"]') t.click(element_identifier='//*[@href="' + str(page_curr) + '"]') # #关闭tagui流 t.close() # # 输出格式为:"blank_date.csv" hist_data = pd.DataFrame(value_dict) hist_data.to_csv("blank_date.csv", index=False, encoding='UTF-8') return max_page
def main_operation(url, mode='txt'): # 当前页面 curr_page = int( t.read(element_identifier='//div[@class = "ng-binding"][last()]'). split('/')[0]) # 点击按钮 list_count = t.count( element_identifier='//div[@class = "list caidan-right-list"]' ) # 循环列表,取出总list有几个 #如果是断点,读取断电数据 if os.path.exists('baojianhui_log.txt'): with open('baojianhui_log.txt', 'r') as f: params = f.read().split(',') curr_page = params[0] start_i = params[1] start_j = params[2] else: #如果是第一次执行,全取初始值; start_i = 1 start_j = 1 #常规操作 for i in range(1, list_count + 1): t.wait(5) if i < int(start_i): continue item_count = t.count( element_identifier='//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"]') # 取出每个list里的具体法规有几条 print('当前是list {}, 里面的元素有 {} 个'.format(str(i), str(item_count))) t.wait(5) for j in range(1, item_count + 1): if j < int(start_j): continue item_title = t.read( element_identifier='//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a') time_suffix = t.read( element_identifier='//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//span[@class = "date ng-binding"]') if str(time_suffix) != str((datetime.datetime.today()).date( )): #如果不是今日日期,直接return;str((datetime.datetime.today()).date()) print('今日增量已取完') return True, '今日无增量' file_name = item_title + '_' + time_suffix + '.txt' if '/' in file_name: file_name = file_name.replace('/', ' ') if mode == 'txt': #点击 link = t.read(element_identifier= '//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a/@ng-href') prefix = 'http://www.cbirc.gov.cn/cn/view/pages/' final_link = prefix + link t.url(final_link) t.wait(1) while not os.path.exists(file_name): # type_1 = t.read(element_identifier='//div[@class = "Section0"]') + t.read(element_identifier='//div[@class = "Section1"]') # type_2 = t.read(element_identifier='//div[@class = "WordSection1"]') # type_3 = t.read(element_identifier='//div[@class = "wenzhang-content ng-binding"]') if t.read( element_identifier='//div[@class = "Section0"]' ) + t.read(element_identifier='//div[@class = "Section1"]' ) != '': #p0 p_counts_section0 = t.count( element_identifier='//div[@class = "Section0"]//p') content_list = [] with open(file_name, 'w', encoding='utf-8') as f: f.write(file_name.split("_")[0] + "\n") for p in range(1, p_counts_section0 + 1): content_list.append( t.read(element_identifier= '//div[@class = "Section0"]//p[' + str(p) + ']')) with open(file_name, 'a', encoding='utf-8') as f: f.writelines( [content + "\n" for content in content_list]) #p1 p_counts_section1 = t.count( element_identifier='//div[@class = "Section1"]//p') content_list = [] for p in range(1, p_counts_section1 + 1): content_list.append( t.read(element_identifier= '//div[@class = "Section1"]//p[' + str(p) + ']')) with open(file_name, 'a', encoding='utf-8') as f: f.writelines( [content + "\n" for content in content_list]) break elif t.read( element_identifier='//div[@class = "WordSection1"]' ) != '': p_counts = t.count(element_identifier= '//div[@class = "WordSection1"]//p') if p_counts <= 1: content_list = t.read( element_identifier= '//div[@class = "WordSection1"]//p') with open(file_name, 'w', encoding='utf-8') as f: f.write(file_name.split("_")[0] + "\n") f.writelines([ content + "\n" for content in content_list.split(" ") ]) else: content_list = [] for p in range(1, p_counts + 1): content_list.append( t.read( element_identifier= '//div[@class = "WordSection1"]//p[' + str(p) + ']')) with open(file_name, 'w', encoding='utf-8') as f: f.write(file_name.split("_")[0] + "\n") f.writelines([ content + "\n" for content in content_list ]) break elif t.read(element_identifier= '//div[@class = "wenzhang-content ng-binding"]' ) != '': #有p》1 #无p 《=1, 用split p_counts = t.count( element_identifier= '//div[@class = "wenzhang-content ng-binding"]//p') if p_counts <= 1: content_list = t.read( element_identifier= '//div[@class = "wenzhang-content ng-binding"]//p' ) with open(file_name, 'w', encoding='utf-8') as f: f.write(file_name.split("_")[0] + "\n") f.writelines([ content + "\n" for content in content_list.split(" ") ]) else: content_list = [] for p in range(1, p_counts + 1): content_list.append( t.read( element_identifier= '//div[@class = "wenzhang-content ng-binding"]//p[' + str(p) + ']')) with open(file_name, 'w', encoding='utf-8') as f: f.write(file_name.split("_")[0] + "\n") f.writelines([ content + "\n" for content in content_list ]) break else: content = ' ' with open(file_name, 'w') as f: f.write(content) break elif mode == 'doc': t.click(element_identifier= '//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a[@ng-click = "fileDownload(x.docFileUrl)"]') doc_id = t.read(element_identifier= '//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a/@ng-href').split('=')[1][:-7] doc_name = doc_id + '.doc' curr_clock = 5 while not os.path.exists(doc_name): t.wait(curr_clock) curr_clock += 5 if curr_clock > MAX_WAIT: break t.wait(5) os.rename(doc_name, item_title + '_' + time_suffix + '.doc') elif mode == 'pdf': t.click(element_identifier= '//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a[@ng-click = "fileDownload(x.pdfFileUrl)"]') pdf_id = t.read(element_identifier= '//div[@class = "list caidan-right-list"][' + str(i) + ']//div[@class = "panel-row ng-scope"][' + str(j) + ']//a/@ng-href').split('=')[1][:-7] pdf_name = pdf_id + '.pdf' curr_clock = 5 while not os.path.exists(pdf_name): t.wait(curr_clock) curr_clock += 5 if curr_clock > MAX_WAIT: break t.wait(5) os.rename(pdf_name, item_title + '_' + time_suffix + '.pdf') else: print('unknown format..') t.close() raise Exception("unknown input mode") # 返回主页面 t.url(url + str(curr_page)) t.wait(5) with open('baojianhui_log.txt', 'w') as f: f.write(str(curr_page) + ',' + str(i) + ',' + str(j)) with open('baojianhui_log.txt', 'w') as f: f.write(str(curr_page) + ',' + str(i) + ',' + str(1)) #当前list取完,j更新
def getdailyincrement(str_to_append): #初始化页面 t.init() #输入url进入 t.url("http://bank.jrj.com.cn/bankpro/data.shtml?type=1") #等5秒反应 t.wait(15) #鼠标放上去,点击精简选项 t.hover(element_identifier='//*[@id="fxr"]') t.click(element_identifier='//*[@id="zksq"]') #鼠标移动到发行日期上,点击文本栏,输入发行日日期为今日,点击搜索 t.hover(element_identifier='//*[@id="fxr"]') t.click(element_identifier='//*[@id="fxr"]') t.type(element_identifier='//*[@id="fxr"]', text_to_type=str_to_append) #再点击,确保日期不会遮住底下的搜索按钮 t.click(element_identifier='//*[@id="fxr"]') t.hover(element_identifier='//*[@class="ipf01"]') t.click(element_identifier='//*[@class="ipf01"]') #把展示的尺寸设置为50个产品每页: t.hover(element_identifier='//*[@data-pagesize="50"]') t.click(element_identifier='//*[@data-pagesize="50"]') #当下一页没有被disable的时候,有以下超参数 page_curr = 1 #当前页面index value_dict = {} #存放data count = 1 #csv 命名用 #存放列名 name_list = ['序号', '综合评级', 'url'] for col_name in name_list: value_dict.setdefault(col_name, []) #初始化空数据集 #当可以翻页,或数据只有一页的时候,进行循环 while (t.read(element_identifier= '//div[@id = "pagefoot"]//a[@class = "cur pf-disabled"]') == str(page_curr)) or (page_curr == 1): #每页的数据量大小(row number) count_values = int( t.count(element_identifier='//tbody[@id = "content"]//tr') ) + 1 # python从0开始 # 爬取页面所有一个table里的值 if str( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(count_values - 1) + ']//td[@class = "px"]')) > str_to_append: # print("direct continue..") # 翻页 page_curr += 1 # 鼠标模拟移动,并点击翻页 t.hover(element_identifier='//*[@href="' + str(page_curr) + '"]') t.click(element_identifier='//*[@href="' + str(page_curr) + '"]') continue filename = str(count) + "daily_data.csv" count += 1 t.wait(1) # 等1秒,万一加载错误了 t.table( element_identifier='//div[@class = "table-s1 tab-s2 w100"]//table', filename_to_save=filename) count_values = int( t.count(element_identifier='//tbody[@id = "content"]//tr') ) + 1 # python从0开始 for i in range(1, count_values): # 判定条件:如果是今天刚发行的,拿到所有主页面上的数据; #如果最下面那条数据都大于今天,就直接翻页 if str( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(count_values - 1) + ']//td[@class = "px"]')) > str_to_append: # print("direct break..") break else: if str( t.read( element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//td[@class = "px"]')) == str_to_append: #序号 value_dict[name_list[0]].append( t.read( element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[2]')) #综合评级 value_dict[name_list[1]].append( t.read( element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//td[12]//i/@title')) #url value_dict[name_list[2]].append( t.read( element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//a/@href')) else: #如果不是今天增量,什么都不做 pass # print("turn the page..") # 翻页 page_curr += 1 # 鼠标模拟移动,并点击翻页 t.hover(element_identifier='//*[@href="' + str(page_curr) + '"]') t.click(element_identifier='//*[@href="' + str(page_curr) + '"]') #关闭tagui流 t.close() #输出格式为:"今日日期.csv" today_data = pd.DataFrame(value_dict) today_data.to_csv(str_to_append + ".csv", index=False, encoding='UTF-8') return count - 1
yesterday=today -datetime.timedelta(days=1) sdate=yesterday.strftime('%d/%m/%Y') edate=today.strftime('%d/%m/%Y') filename=today.strftime('%d.%m.%Y %H.%M.%S') directory=os.getcwd() t.url("https://corp.onlinesbi.com?saral/login.htm") t.wait(5) t.keyboard('[tab]') t.wait(1) t.keyboard('[esc]') t.wait(1) t.keyboard('[tab]') t.wait(1) t.keyboard('[esc]') t.wait(2) t.click('//*[@id="banking"]/div[2]/div/div/div/a') t.wait(4) t.click('//*[@id="banner"]/div[2]/a') t.wait(5) t.click('//*[@id="username"]') t.type('//*[@id="username"]','29KRISHNAJI') t.wait(4) t.type('//*[@id="label2"]','TUTAC2017%[enter]') t.wait(5) t.click('//*[@id="contentDiv"]/div[2]/div[2]/div[4]/a') path=r'C:\Users\garv2\Desktop\SBI\balance.jpeg'#PATH balance="balance"+str(filename) path=path.replace("balance",balance) t.snap('page',path) t.wait(3) t.click('//*[@id="navbar"]/div[1]/a[1]')
import tagui as t login = ['7038157994'] cont = len(login) aux = 0 while aux != cont: t.init() t.url( 'http://servicos.coelba.com.br/servicos-ao-cliente/Pages/login-av.aspx?UrlUc=http://servicos.coelba.com.br/servicos-ao-cliente/Pages/2-via-de-conta-coelba.aspx' ) t.click( 'ctl00$m$g_2d0a0930_51e9_4b08_addf_fccd4023f2e8$ctl00$txtContaContrato' ) t.type( 'ctl00$m$g_2d0a0930_51e9_4b08_addf_fccd4023f2e8$ctl00$txtContaContrato', login[aux]) captcha = t.read('textCaptcha') t.click('ctl00$m$g_2d0a0930_51e9_4b08_addf_fccd4023f2e8$ctl00$txtCaptcha') t.type('ctl00$m$g_2d0a0930_51e9_4b08_addf_fccd4023f2e8$ctl00$txtCaptcha', captcha) t.click( 'ctl00$m$g_2d0a0930_51e9_4b08_addf_fccd4023f2e8$ctl00$btnAutenticar') t.close() aux += 1
import tagui as t url = "https://sg.puma.com" t.init() t.url(url) t.click('//input[@id="search-submit"]') t.type('(//input[@id="search-inputor"])[1]','Courts FF')
import datetime import pandas as pd import os import s3_function #超参数 try: str_to_append = str(datetime.datetime.today().date()) # 初始化页面 t.init() # 输入url进入 t.url("http://bank.jrj.com.cn/bankpro/data.shtml?type=1") # 等15秒反应 t.wait(15) # 鼠标放上去,点击精简选项 t.hover(element_identifier='//*[@id="fxr"]') t.click(element_identifier='//*[@id="zksq"]') # 鼠标移动到发行日期上,点击文本栏,输入发行日日期为今日,点击搜索 t.hover(element_identifier='//*[@id="fxr"]') t.click(element_identifier='//*[@id="fxr"]') t.type(element_identifier='//*[@id="fxr"]', text_to_type=str_to_append) # 再点击,确保日期不会遮住底下的搜索按钮 t.click(element_identifier='//*[@id="fxr"]') t.hover(element_identifier='//*[@class="ipf01"]') t.click(element_identifier='//*[@class="ipf01"]') # 把展示的尺寸设置为50个产品每页: t.hover(element_identifier='//*[@data-pagesize="50"]') t.click(element_identifier='//*[@data-pagesize="50"]') # 当下一页没有被disable的时候,有以下超参数 page_curr = 1 # 当前页面index value_dict = {} # 存放data
def rpa_process(lmp_date, doctor_name, preferred_time, phone_number, patient_name, symptoms, email, sub_id): hour = preferred_time.hour minute = preferred_time.minute checkup_dates = [] day_list = [ 45, 75, 105, 135, 165, 195, 210, 225, 240, 255, 262, 269, 275, 280 ] week_list = [6, 10, 14, 18, 22, 26, 28, 30, 32, 34, 36, 37, 38, 39] for day in day_list: checkup = lmp_date + timedelta(days=day) checkup = str(checkup.day) + "/" + str(checkup.month) + "/" + str( checkup.year) checkup_dates.append(checkup) t.init() for index, i in enumerate(checkup_dates): t.url("https://sangam-test-website.herokuapp.com/") util.wait_for_pageload('//button[@id="btnsubmit"]') t.click('//input[@class="form-control"]') t.type('//input[@name="name"]', patient_name) t.click('//input[@id="email"]') t.type('//input[@name="email"]', email) symptoms = "Pregnancy checkup after week " + str(week_list[index]) t.type('//textarea', symptoms) t.click('//input[@id="txtHandNo"]') t.type('//input[@name="txtHandNo"]', phone_number) t.click('//div[@class="filter-option-inner-inner"]') t.click('//a[@role= "option"][.=' + str(hour) + ']') t.click('//select[@id="ddlMin"]') t.click('//a[@role= "option"][.=' + str(minute) + ']') t.click('//input[@name="txtDateTimePicker"]') t.type('//input[@name="txtDateTimePicker"]', i) t.click('//select[@id="txtSpecificDoc"]') t.click('//a[@role= "option"][.="' + str(doctor_name) + '"]') t.click('//button[@id="btnsubmit"]') t.close() request_url = "https://sangam-test-website.herokuapp.com/get_future_appointments?email=" + str( email) future_appointments = requests.get(request_url) book_calendar_slot(future_appointments.json()['data'], sub_id)
def get_shoe(shoe_name, g, email): """ Get shoe details from jdsports.com.sg :param shoe_name: name of the shoe to search for :param gender: gender of the subscriber :param email: email id of the subscriber :return: details, list of shoe details. """ details = [] t.init(visual_automation=True) t.url('https://www.jdsports.com.sg/') t.wait(5) final_command = shoe_name + " shoes" + '[enter]' t.keyboard('[esc]') t.type('//input[@id = "srchInput"]', final_command) #t.click('//input[@id ="srchButton"]') t.wait(3) if g == ' men': if t.read( '(//a[@data-e2e="plp-filterMenu-catItem"]/span)[contains(.,"Men")]' ): t.click('(//a[@data-e2e="plp-filterMenu-catItem"]/span)[1]') count = t.count( '//ul[@id="productListMain"]//li[@class="productListItem "]') t.wait(3) if count != 0: for i in range(1, min(count, 4)): price = t.read(f'(//span[@class="pri"])[{i}]') name = t.read(f'(//span[@class="itemTitle"])[{i}]') img = t.read( f'(//a[@class="itemImage"]/picture/img/@srcset)[{i}]') link = "https://www.jdsports.com.sg" + t.read( f'(//span[@class = "itemTitle"])[{i}]/a/@href') details.append({ "email": email, "name": name, "price": price, "img": img, "Company": "JD", "link": link }) else: details.append({ "email": email, "name": "NA", "price": "NA", "img": "NA", "Company": "JD", "link": "NA" }) elif g == ' women': if t.read( '(//a[@data-e2e="plp-filterMenu-catItem"]/span)[contains(.,"Women")]' ): t.click( '(//a[@data-e2e="plp-filterMenu-catItem"]/span)[.="Women"]') count = t.count( '//ul[@id="productListMain"]//li[@class="productListItem "]') t.wait(3) if count != 0: for i in range(1, min(count, 4)): price = t.read(f'(//span[@class="pri"])[{i}]') name = t.read(f'(//span[@class="itemTitle"])[{i}]') img = t.read( f'(//a[@class="itemImage"]/picture/img/@srcset)[{i}]') link = "https://www.jdsports.com.sg" + t.read( f'(//span[@class = "itemTitle"])[{i}]/a/@href') details.append({ "email": email, "name": name, "price": price, "img": img, "Company": "JD", "link": link }) else: details.append({ "email": email, "name": "NA", "price": "NA", "img": "NA", "Company": "JD", "link": "NA" }) else: count = t.count( '//ul[@id="productListMain"]//li[@class="productListItem "]') t.wait(3) if count != 0: for i in range(1, min(count, 4)): price = t.read(f'(//span[@class="pri"])[{i}]') name = t.read(f'(//span[@class="itemTitle"])[{i}]') img = t.read( f'(//a[@class="itemImage"]/picture/img/@srcset)[{i}]') link = "https://www.jdsports.com.sg" + t.read( f'(//span[@class = "itemTitle"])[{i}]/a/@href') details.append({ "email": email, "name": name, "price": price, "img": img, "Company": "JD", "link": link }) else: details.append({ "email": email, "name": "NA", "price": "NA", "img": "NA", "Company": "JD", "link": "NA" }) #t.close() if len(details) == 0: details.append({ "email": email, "name": "NA", "price": "NA", "img": "NA", "Company": "JD", "link": "NA" }) # print("JD BOT",details) return details
def gethistorylist(input): # 获取xxxx年的数据 input = str(input) date_start = input + '-08-01' #一年开始的日期 (试一试10天的) date_end = input + '-12-31' #一年结束的日期 #初始化页面 t.init() #输入url进入 t.url("http://bank.jrj.com.cn/bankpro/data.shtml?type=1") #等5秒网页加载 t.wait(5) #鼠标放上去,点击精简选项 t.hover(element_identifier='//*[@id="fxr"]') t.click(element_identifier='//*[@id="zksq"]') #鼠标移动到发行日期上,点击文本栏,输入发行日日期为今日,点击搜索 t.hover(element_identifier='//*[@id="fxr"]') t.click(element_identifier='//*[@id="fxr"]') t.type(element_identifier='//*[@id="fxr"]', text_to_type=date_start) #再点击,确保日期不会遮住底下的搜索按钮 t.click(element_identifier='//*[@id="fxr"]') t.hover(element_identifier='//*[@class="ipf01"]') t.click(element_identifier='//*[@class="ipf01"]') #把展示的尺寸设置为50个产品每页: t.hover(element_identifier='//*[@data-pagesize="50"]') t.click(element_identifier='//*[@data-pagesize="50"]') #点击以发行日升序排行,等价于"倒过来取" t.hover(element_identifier='//*[@data-sort = "sell_org_date"]') t.click(element_identifier='//*[@data-sort = "sell_org_date"]') #当下一页没有被disable的时候,有以下超参数 page_curr = 1 #当前页面index value_dict = {} #存放data max_page = 1 #最大的页面数记录 #存放列名 name_list = ['序号', '综合评级', 'url'] for col_name in name_list: value_dict.setdefault(col_name, []) #初始化空数据集 #当可以翻页,或数据只有一页的时候,进行循环 stop_flag = False #当当前页面不是最后一页,或只有一页时,都进行如下循环 while (t.read(element_identifier= '//div[@id = "pagefoot"]//a[@class = "cur pf-disabled"]') == str(page_curr)) or (page_curr == 1): if stop_flag == True: #如果没有今年的数据,就没必要翻页了 break max_page = page_curr #每页的数据量大小(row number) count_values = int( t.count(element_identifier='//tbody[@id = "content"]//tr') ) + 1 # python从0开始 #爬取页面所有一个table里的值 filename = str(input) + str("_") + str(page_curr) + "history_data.csv" t.wait(1) #等1秒,万一加载错误了 t.table( element_identifier='//div[@class = "table-s1 tab-s2 w100"]//table', filename_to_save=filename) #爬取当前页面 (只有title和href) for i in range(1, count_values): # 判定条件:如果是今年内(小于今年12-31或等于12-31的),全都要 if str( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//td[@class = "px"]')) <= date_end: # print("number {} is running".format(str(i))) #爬取产品名称作为primary key,之后join用: # 产品序号 value_dict[name_list[0]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']/td[2]')) # 综合评级 value_dict[name_list[1]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//td[12]//i/@title')) # url value_dict[name_list[2]].append( t.read(element_identifier='//tbody[@id = "content"]//tr[' + str(i) + ']//a/@href')) else: # 如果已经超过今年的数据了,此线程结束,flag置true, while循环结束 stop_flag = True # print("thread stops here..") break # 翻页 page_curr += 1 # print("turn the page..") # 鼠标模拟移动,并点击翻页 t.hover(element_identifier='//*[@href="' + str(page_curr) + '"]') t.click(element_identifier='//*[@href="' + str(page_curr) + '"]') # #关闭tagui流 t.close() #输出格式为:"今年年份.csv" hist_data = pd.DataFrame(value_dict) #双格式(csv + xlsx 输出) hist_data.to_csv(input + ".csv", encoding='UTF-8', index=False) return max_page
def rpa_process(to_date, preferred_time, from_date, phone_number, token): t.init() t.url("https://sangam-test-website.herokuapp.com/change_input") util.wait_for_pageload('//button[@id="btnsubmit"]') t.click('//input[@id="txtHandNo"]') t.type('//input[@name="txtHandNo"]', phone_number) t.click('//button[@id="btnsubmit"]') util.wait_for_pageload('//button[@id="btnsubmit"]') from_date_obj = from_date from_date = from_date.strftime("%d/%m/%Y") t.click('//label[contains(.,"' + str(from_date) + '")]') to_date_obj = to_date hour = to_date.hour minute = to_date.minute to_date = to_date.strftime("%d/%m/%Y") t.click('//input[@name="txtDateTimePicker"]') t.type('//input[@name="txtDateTimePicker"]', to_date) t.click('//div[@class="filter-option-inner-inner"]') t.click('//a[@role= "option"][.=' + str(hour) + ']') t.click('//select[@id="ddlMin"]') t.click('//a[@role= "option"][.=' + str(minute) + ']') t.click('//button[@id="btnsubmit"]') t.close() change_appointment_slot(from_date_obj, to_date_obj, token)
import tagui as t import datetime import pandas as pd #初始化页面 t.init() #输入url进入 t.url("http://bank.jrj.com.cn/bankpro/data.shtml?type=1") #直接开始搜索,不需要任何筛选条件 t.click(element_identifier='//*[@id="fxr"]') t.hover(element_identifier='//*[@class="ipf01"]') t.click(element_identifier='//*[@class="ipf01"]') #把展示的尺寸设置为50个产品每页: t.hover(element_identifier='//*[@data-pagesize="50"]') t.click(element_identifier='//*[@data-pagesize="50"]') #点击以发行日升序排行,等价于"倒过来取",这样发行日为空的会在最前面 t.hover(element_identifier='//*[@data-sort = "sell_org_date"]') t.click(element_identifier='//*[@data-sort = "sell_org_date"]') #当下一页没有被disable的时候,有以下超参数 page_curr = 1 #当前页面index value_dict = {} #存放data #存放列名 name_list = ['序号', '产品名称', '发行银行', '委托货币', '发行日', '停售日', '管理期(天)', '预期收益率', '到期收益率', '与同期储蓄比', '综合评级','url'] for col_name in name_list: value_dict.setdefault(col_name, []) #初始化空数据集