def job_func(username, password): browser = Browser("chrome") browser.visit('https://web-vpn.sues.edu.cn/') browser.find_by_id('username').fill(username) browser.find_by_id('password').fill(password) browser.find_by_id('passbutton').click() print(datetime.datetime.now()) print("登陆成功") time.sleep(10) # 登陆填报界面 browser.find_by_text('健康信息填报').click() time.sleep(10) browser.windows.current = browser.windows[1] time.sleep(10) tep = random.uniform(36.1, 36.7) tep = format(tep, '.1f') # 随机一些温度 browser.find_by_xpath( '//*[@id="form"]/div[18]/div[1]/div/div[2]/div/div/input').fill(tep) # 找到温度填写的框 time.sleep(5) browser.find_by_id('post').click() # 找到按钮 time.sleep(5) result = browser.find_by_xpath("//*[@id=\"layui-layer1\"]/div[2]").text browser.quit() return result
class BaseWebTestCase(LiveServerTestCase): """ Abstract class to handle logic for web tests """ username = "******" password = "******" wait_seconds = 3.0 def setUp(self): chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--window-size=1920,1080") chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--no-sandbox") self.browser = Browser("chrome", headless=False, wait_time=30, options=chrome_options) self.browser.driver.set_page_load_timeout(240) super(BaseWebTestCase, self).setUp() def tearDown(self): self.browser.quit() super(BaseWebTestCase, self).tearDown() def _wait(self): time.sleep(self.wait_seconds) def _login(self): self._visit("") self.browser.fill("username", self.username) self.browser.fill("password", self.password) self.browser.find_by_text("Sign in").first.click() assert self.browser.is_text_present( "Admin") # checks that the nav is visible assert not self.browser.is_text_present("Sign in") def _go_home(self): self._visit("/") def _setup_confirm(self): """ First part of work-around to let phantomjs accept confirmation dialogs http://stackoverflow.com/questions/19903146/confirm-alert-window-in-phantom-js """ js_confirm = "window.confirm = function() { return true }" self.browser.execute_script(js_confirm) def _accept_confirm(self): """ Second part of work-around to let phantomjs accept confirmation dialogs MUST call self._setup_confirm() for this to work """ self.browser.execute_script("return window.confirm") def _visit(self, path): path = self.live_server_url + path self.browser.visit(path) self._wait()
def splinter(url): browser = Browser() print('Opening browser\n') browser.visit(url) time.sleep(3) print('Selecting user type\n') browser.find_by_id('userTypeSelect').click() time.sleep(1) print('Click \"Student\"\n') browser.find_by_text('Student').click() time.sleep(1) print('Filling email and password\n') browser.find_by_id('inputEmail').fill('*****@*****.**') browser.find_by_id('inputPassword').fill('8C51B7') print('Submitting form\n') browser.find_by_id('submitForm').click() time.sleep(3) print('Continue courses\n') browser.find_link_by_href('CourseLogin.aspx?courseid=2360111').click() time.sleep(3) print('Click the lesson') browser.find_link_by_href( '/mod/homepage/view.php?id=322&continue=true').click() for i in range(1, 100): try: for h in range(4): choice = 'multichoice_' + str(i) + '_' + str(h + 1) print('Click choice: ' + choice) browser.find_by_id(choice).click() for k in range(4): unclick = 'multichoice_' + str(i) + '_' + str(k + 1) browser.find_by_id(unclick).click() time.sleep(1) browser.find_by_value('Check Response').click() time.sleep(1) try: browser.find_by_id('nextnavbutton').click() print('Correct') except: print('Not correct') browser.find_by_id(unclick).click() except: for j in range(4): choice = 'multichoice_' + str(i) + '_' + str(j + 1) print('Looking for id: ' + choice) browser.find_by_id(choice).click() time.sleep(1) browser.find_by_value('Check Response').click() time.sleep(1) try: browser.find_by_id('nextnavbutton').click() except: print('Wrong choice') print('Done or not a multi choice question') finally: print('Countdown started: 60sec') time.sleep(60) print('Trying to click \"next\"') browser.find_by_id('nextnavbutton').click()
class Submitter: def __init__(self, url, username, password, course_id, homework_id, submit_list): self._callback = None self._browser = Browser() self._url = url self._username = username self._password = password self._course_id = course_id self._homework_id = homework_id self._submit_list = submit_list def _login(self): self._browser.visit(self._url) self._browser.fill("i_user", self._username) self._browser.fill("i_pass", self._password) self._browser.find_by_id("loginButtonId").click() def _nvi2course(self): self._browser.find_link_by_partial_text(self._course_id).first.click() self._browser.windows.current.close() def _nvi2homework(self): self._browser.find_link_by_partial_text("课程作业").first.click() self._browser.find_link_by_partial_text( self._homework_id).first.click() def _submit(self, stu_id, grade, comment, ex_file): xpath_str = '//tbody/tr[td[3]=' + stu_id + ']/td[last()]/a' self._browser.find_by_xpath(xpath_str).last.click() self._browser.fill('cj', grade) self._browser.fill('pynr', comment) if os.path.splitext(ex_file)[1] == '.pdf': self._browser.driver.find_element_by_name('fileupload').send_keys( ex_file) submit_btn_css = 'div[class="sub-back sub-back-3 absolute"] > input[class="btn"]' self._browser.find_by_css(submit_btn_css).first.click() while not self._browser.is_text_present('关闭', wait_time=1): pass self._browser.find_by_text('关闭').click() self._browser.back() self._browser.back() def add_single_task_callback(self, callback): self._callback = callback def start(self): self._login() self._nvi2course() self._nvi2homework() for stu_id, grade, comment, ex_file in self._submit_list: self._submit(stu_id, grade, comment, ex_file) self._callback([stu_id, grade, comment, ex_file]) self._browser.quit() @staticmethod def clean(): work_dir = os.getcwd() os.remove(work_dir + "/geckodriver.log")
def browse(url): browser = Browser("phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any']) browser.visit(url) browser.find_by_id('RadioW03').first.click() browser.find_by_id('RadioS03').first.click() browser.find_by_text('\n Search\n ').first.click() return browser
class BaseWebTestCase(LiveServerTestCase): """ Abstract class to handle logic for web tests """ username = '******' password = '******' wait_seconds = 3.0 def setUp(self): chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--window-size=1920,1080") chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--no-sandbox") self.browser = Browser('chrome', headless=True, wait_time=10, options=chrome_options) super(BaseWebTestCase, self).setUp() def tearDown(self): self.browser.quit() try: super(BaseWebTestCase, self).tearDown() except IndexError: print("Ignoring IndexError in tearDown...") def _login(self): self._visit("") self.browser.fill('username', self.username) self.browser.fill('password', self.password) self.browser.find_by_text('Sign in').first.click() assert self.browser.is_text_present('Home') assert not self.browser.is_text_present('Sign in') def _go_home(self): self.browser.click_link_by_text('Home') time.sleep(self.wait_seconds) def _setup_confirm(self): """ First part of work-around to let phantomjs accept confirmation dialogs http://stackoverflow.com/questions/19903146/confirm-alert-window-in-phantom-js """ js_confirm = 'window.confirm = function() { return true }' self.browser.execute_script(js_confirm) def _accept_confirm(self): """ Second part of work-around to let phantomjs accept confirmation dialogs MUST call self._setup_confirm() for this to work """ self.browser.execute_script('return window.confirm') def _visit(self, path): path = self.live_server_url + path self.browser.visit(path) time.sleep(self.wait_seconds)
def fb_marketplace(login_name, login_pw, title, pic_path, description, price, plz, street, company, phone): url = "https://www.facebook.com" browser = Browser('chrome') browser.driver.set_window_size(1200, 900) browser.visit(url) browser.fill('email', login_name) browser.fill('pass', login_pw) browser.find_by_id("loginbutton").click() time.sleep(6) browser.find_by_text('Marketplace').click()
def get_collection(collection_id): url = 'https://www.szukajwarchiwach.gov.pl/zespol/-/zespol/{}'.format( collection_id) print(url) collections = [] browser = Browser('chrome', headless=True) browser.visit(url) # wybieramy liste jednostek i ustawiam 200 per page link = browser.find_by_id('przejdzDoJednostek') if link.is_empty(): print('brak jednostek; return') return collections link.click() if not browser.find_by_css( '.dropdown.pagination-items-per-page').is_empty(): browser.find_by_css('.dropdown.pagination-items-per-page')[0].click() browser.find_by_text('200')[0].click() # w petli pobieram kolejne strony while True: last_link = None if not browser.find_by_css('.pagination').is_empty(): last_link = browser.find_by_css('.pagination')[0].find_by_tag( 'a').last # pobieramy tabelke collections.append(browser.find_by_css('.jednostkaObiekty')[0].html) """ for tr in browser.find_by_css('.jednostkaObiekty')[0].find_by_tag('tr'): tds = tr.find_by_tag('td') # jesli to jest naglowej gdzie nie ma td to nara if len(tds) == 0: continue sygnatura = tds[0].text nazwa = tds[1].text daty = tds[2].text skany = tds[3].text href = tds[1].find_by_tag('a')._element.get_attribute('href') collections.append([sygnatura, nazwa, daty, skany, href]) """ # jesli jest nastepna strona to lecimy dalej if last_link is None or ('javascript:;' == last_link._element.get_attribute('href')): break print('next page') last_link.click() return collections
def get_archive(slug): """ Zwraca zespoly z archiwum :param slug: :param page: int|None, None oznacza pobranie wszystkich stron :return: """ url = 'https://www.szukajwarchiwach.gov.pl{}'.format(slug) zespoly = [] browser = Browser('chrome', headless=True) browser.visit(url) print(url) # wybieramy liste zespolow i ustawiam 200 per page browser.find_by_text('Lista zespołów')[0].click() browser.find_by_css('.dropdown.pagination-items-per-page')[0].click() browser.find_by_text('200')[0].click() # w petli pobieram kolejne strony while True: last_link = browser.find_by_css('.pagination')[0].find_by_tag('a').last # pobieramy tabelke for tr in browser.find_by_css('.lista-zespolow-wyniki')[0].find_by_tag( 'tr'): if tr._element.get_attribute('class') == 'entityDetails': continue tds = tr.find_by_tag('td') if len(tds) == 0: continue sygnatura = tds[0].text nazwa = tds[1].text daty = tds[2].text skany = tds[3].text href = tds[1].find_by_tag('a')._element.get_attribute('href') zespol_id = href.split('/').pop() collections = get_collection(zespol_id) zespoly.append( [sygnatura, nazwa, daty, skany, href, zespol_id, collections]) # jesli jest nastepna strona to lecimy dalej if 'javascript:;' == last_link._element.get_attribute('href'): break print('next page') last_link.click() return zespoly
class AuthedSplinterTestCase(ChannelsLiveServerTestCase): def setUp(self): self.browser = Browser('chrome', headless=True) user = AuthUserFactory() self.browser.visit(f'{self.live_server_url}{reverse("users:login")}') self.browser.fill('username', user.username) self.browser.fill('password', "password") self.browser.find_by_text("Login")[0].click() def tearDown(self): self.browser.quit()
def get_access_code(self, client_id): driver = {'chromedriver.exe'} browser = Browser('chrome', driver, headless=False) # define components of url method = 'GET' url = 'https://auth.tdameritrade.com/auth?' client_code = client_id + '@AMER.OAUTHAP' payload = { 'response_type': 'code', 'redirect_uri': 'http://32.211.92.157', 'client_id': client_code } # build url my_url = requests.Request(method, url, params=payload).prepare() my_url = my_url.url browser.visit(my_url) # login payload = {'username0': user_id, 'password': password} browser.find_by_id('username0').first.fill(payload['username0']) time.sleep(1) browser.find_by_id('password').first.fill(payload['password']) time.sleep(1) browser.find_by_id('accept').first.click() time.sleep(1) browser.find_by_text("Can't get the text message?").first.click() browser.find_by_value('Answer a security question').first.click() # answer security questions if browser.is_text_present('What was the name of your high school?'): browser.find_by_id('secretquestion0').first.fill( 'East Lyme High School') elif browser.is_text_present('What was your high school mascot?'): browser.find_by_id('secretquestion0').first.fill('The Vikings') elif browser.is_text_present('What was the name of your first pet?'): browser.find_by_id('secretquestion0').first.fill('Cody') elif browser.is_text_present( 'What was the name of the town your grandmother lived in? (Enter full name of town only.)' ): browser.find_by_id('secretquestion0').first.fill('Scranton') browser.find_by_id('accept').first.click() time.sleep(1) browser.find_by_id('accept').first.click() # parse url time.sleep(1) new_url = browser.url access_code = urllib.parse.unquote(new_url.split('code=')[1]) # close browser browser.quit() self.access_code = access_code print('access_code:', access_code) return access_code
def login12306(): b = Browser(driver_name="chrome") url = "https://kyfw.12306.cn/otn/leftTicket/init" b.visit(url) raw_input("请直接在页面输入目的地信息和出发时间,点击查询后,按任意键继续: ") b.cookies.add({"_jc_save_fromDate": "2016-02-09"}) b.cookies.add({"_jc_save_fromStation": "%u5317%u4EAC%2CBJP"}) b.cookies.add({"_jc_save_toStation": "%u4E0A%u6D77%2CSHH"}) b.reload() while (b.is_element_not_present_by_text(u"预订")): b.find_by_text(u"查询").click() time.sleep(3) b.find_by_text(u"预订")[0].click() exit()
def search_advs(searches): browser = Browser() advs_dict = {} for s in searches: browser.visit('http://mobile.bg') try: browser.find_by_id('ZaplataFooterClose').first.click() except Exception as e: pass browser.find_option_by_text(s['kategoria']).first.click() browser.find_option_by_text(s['marka']).first.click() if s['model'] != '': browser.find_option_by_text(s['model']).first.click() #go to Podrobno Tursene browser.find_by_text('Подробно търсене').first.click() time.sleep(sleep_time) #close zaplata banner try: browser.find_by_id('ZaplataFooterClose').first.click() print(1) except Exception as e: pass #print ('could not close banner:', e) browser.find_option_by_text(sortirane).first.click() if s['dvigatel'] != '': browser.find_option_by_text(s['dvigatel']).first.click() if s['godina'][0] != 0: browser.find_option_by_text('от ' + str(s['godina'][0]) + ' г.').first.click() if s['godina'][1] != 0: browser.find_option_by_text('до ' + str(s['godina'][1]) + ' г.').first.click() if s['region'] != '': browser.find_option_by_text(s['region']).first.click() if s['grad'] != '': browser.find_option_by_text(s['grad']).first.click() for o in s['opcii']: browser.find_by_text(o).first.click() #TODO: add option za chastni obqvi / vsichki obqvi browser.find_by_value('Т Ъ Р С И').first.click() advs = get_advs_hrefs(browser) #for h in advs: # print(h) print('total', len(advs)) advs_dict[s['search_name']] = advs browser.quit() return advs_dict
def login(user, pwd, dir): DiverUrl = dir + "\\driver\\chromedriver.exe" executable_path = {'executable_path': DiverUrl} if not os.path.exists(dir + "\\temp"): os.mkdir(dir + "\\temp") imagURL = dir + "\\temp\\ocr.jpg" loginFlag = True dailyFlag = True browser = Browser("chrome", **executable_path) while (loginFlag or dailyFlag): #循环直到打卡成功 try: alert = browser.get_alert() #如果有代理登陆提示,则点击取消 except Exception: pass else: alert.dismiss() try: if loginFlag: #将登陆与打卡动作分离,便于判断 browser.visit("http://kq.neusoft.com") input = browser.find_by_tag("input") input[4].fill(user) input[5].fill(pwd) screenshot_path = browser.find_by_id( "tbLogonPanel").screenshot( imagURL, full=True ) #因单独截取验证码图片会出现偏离而无法识别,故而截取整个Form,再对识别结果进行处理,最终返回一个截图的路径 ocrResult = imgOcr(screenshot_path) os.remove(screenshot_path) #删除验证码截图 if ocrResult is 0: #避免识别错误导致页面刷新从而出现bug logOut("图片路径:" + screenshot_path + ",识别失败") continue print(time.strftime("%D %H:%M:%S", time.localtime()), "识别验证码为:", ocrResult) input[6].fill(ocrResult) #填入验证码识别结果 browser.find_by_id("loginButton").click() #点击登陆按钮 if browser.is_text_present('打卡'): loginFlag = False logOut("登陆成功") if dailyFlag: timeNow = time.strftime("%H:%M", time.localtime()) browser.find_by_text("打卡").first.click() #直接执行js语句 if browser.find_by_tag("tr").last.find_by_tag( "td").last.text[0:5] == timeNow: dailyFlag = False logOut("打卡成功") except Exception as identify: logOut("打卡失败:" + str(identify)) #直接进入下一循环 browser.quit()
def scrape(): browser = Browser('chrome', **executable_path, headless=False) data = {} browser.visit('https://redplanetscience.com/') data['title'] = browser.find_by_css('div.content_title').text data['paragraph'] = browser.find_by_css('div.article_teaser_body').text browser.visit('https://spaceimages-mars.com') browser.find_link_by_partial_text('FULL IMAGE').click() data['image'] = browser.find_by_css('img.fancybox-image')['src'] data['table'] = pd.read_html('https://galaxyfacts-mars.com')[1].to_html() browser.visit('https://marshemispheres.com/') hemispheres = [] for i in range(4): hemisphere = {} hemisphere['title'] = browser.find_by_css('a.itemLink h3')[i].text browser.find_by_css('a.itemLink h3')[i].click() hemisphere['url'] = browser.find_by_text('Sample')['href'] browser.back() hemispheres.append(hemisphere) browser.quit() data['hemispheres'] = hemispheres return data
def for_mars(): executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path, headless=False) browser urlhemisphere = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' browser.visit(urlhemisphere) hemisphere_list = [] for i in range(4): h3 = browser.find_by_css('a.product-item h3') hemisphere_dict = {} h3[i].click() html = browser.html soup = BeautifulSoup(html, 'html.parser') Sample = browser.find_by_text('Sample') title = soup.find('h2').text.strip() hemisphere_dict['title']= title hemisphere_dict['image_url']=Sample['href'] hemisphere_list.append(hemisphere_dict) browser.back() print(hemisphere_list) browser.quit() return hemisphere_list
def scrape_hemisphere_enhanced_images(): executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path, headless=True) base_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars" all_hemispheres = [] browser.visit(base_url) num_hemispheres = len(browser.find_by_css(".thumb")) for hemisphere_num in range(num_hemispheres): curr_title = browser.find_by_tag("h3")[hemisphere_num].html.replace( " Enhanced", "") browser.find_by_css(".thumb")[hemisphere_num].click() curr_img_url = browser.find_by_text("Sample").first["href"] # print(curr_img_url) browser.back() all_hemispheres.append({"title": curr_title, "img_url": curr_img_url}) browser.windows[0].close_others() # print(all_hemispheres) browser.quit() return all_hemispheres
def GetDate(start_date, end_date): browser = Browser() f_map = {} browser.visit("https://www.voeazul.com.br/") #print browser.is_text_present("Comprar") #browser.fill('ticket-origin1', 'recife') browser.find_by_id('ticket-origin1').type("recife") time.sleep(1) active_web_element = browser.driver.switch_to_active_element() active_web_element.send_keys(Keys.ENTER) browser.find_by_id('ticket-destination1').type("belo") time.sleep(1) active_web_element = browser.driver.switch_to_active_element() active_web_element.send_keys(Keys.ENTER) time.sleep(1) browser.find_by_id('ticket-departure1').type(start_date) browser.find_by_id('ticket-arrival1').type(end_date) browser.find_by_text("Buscar passagens")[0].click() for f in (browser.find_by_id("tbl-depart-flights").find_by_tag("tr") + browser.find_by_id("tbl-return-flights").find_by_tag("tr")): info = GetFlightInfo(f, False) if info != None: print info key = GetKey(info) if f_map.has_key(key): print "ID is not unique as was expected!" f_map[key] = info browser.find_by_id("availability_pointsFareType").click() time.sleep(5) for f in (browser.find_by_id("tbl-depart-flights").find_by_tag("tr") + browser.find_by_id("tbl-return-flights").find_by_tag("tr")): info = GetFlightInfo(f, True) if info != None: print info key = GetKey(info) if not f_map.has_key(key): print "ID for miles not present in price request!" continue f_map[key]["miles"] = info["miles"] browser.quit() return f_map
def test_login(self): with pyvirtualdisplay.Display(): browser = Browser() browser.visit("http://ui:8080/accounts/login/") browser.fill("login", "testuser") browser.fill("password", "password") browser.find_by_css(".btn-primary").click() self.assertTrue(browser.find_by_text("Successfully signed in as testuser."))
def get_token(username, password): from splinter import Browser import time browser = Browser() try: browser.visit("https://timetableplanner.app.uq.edu.au/") count = 0 while browser.is_element_not_present_by_text( "Sign in and get started!") and count < 10: time.sleep(1) count += 1 if browser.is_element_present_by_text("Sign in and get started!"): browser.find_by_text("Sign in and get started!").click() else: return None count = 0 while browser.is_element_not_present_by_id("username") and count < 10: time.sleep(1) count += 1 if browser.is_element_present_by_id( "username") and browser.is_element_present_by_id("password"): browser.fill('username', username) browser.fill('password', password) else: return None count = 0 while browser.is_element_not_present_by_value("LOGIN") and count < 10: time.sleep(1) count += 1 if browser.is_element_present_by_value("LOGIN"): browser.find_by_value("LOGIN").click() else: return None count = 0 while "remember_token" not in browser.cookies and count < 10: time.sleep(1) count += 1 if "remember_token" in browser.cookies: return browser.cookies['remember_token'] else: return None finally: try: browser.quit() except: print("Unable to close browser. Do it yourself!")
def test_login(self): with pyvirtualdisplay.Display(): browser = Browser() browser.visit("http://ui/accounts/login/") browser.fill("login", "testuser") browser.fill("password", "password") browser.find_by_css(".btn-primary").click() self.assertTrue(browser.find_by_text("Successfully signed in as testuser."))
def browser_start(): global date,browser,usr_name,usr_pw,place_floor,url login_url = 'https://info2.ntu.edu.tw/facilities/Default.aspx' if place_floor == '1': url = 'https://info2.ntu.edu.tw/facilities/PlaceGrd.aspx?nFlag=0&placeSeq=2&dateLst=' if place_floor == '3': url = 'https://info2.ntu.edu.tw/facilities/PlaceGrd.aspx?nFlag=0&placeSeq=1&dateLst=' url = url + date browser = Browser('chrome',executable_path='./chromedriver.exe') browser.visit(login_url) browser.find_by_text(u"學生登入").click() browser.find_by_name('user').fill(usr_name) browser.find_by_name('pass').fill(usr_pw) browser.find_by_name('Submit').click() browser.visit(url)
def scrape(): # Dictionary for returning information = dict() # Getting news title and text executable_path = {'executable_path': ChromeDriverManager().install()} browser = Browser('chrome', **executable_path, headless=False) url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest" browser.visit(url) html = browser.html soup = BeautifulSoup(html, "html.parser") article = soup.find('div',class_="list_text") title = article.a.text paragraph = soup.find('div',class_="article_teaser_body").text information = {'News title': title, 'News text': paragraph} browser.quit() # Getting feature image executable_path = {'executable_path': ChromeDriverManager().install()} browser = Browser('chrome', **executable_path, headless=False) url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars" browser.visit(url) src = browser.find_by_tag('article')['style'] src = src.replace('background-image: url("','') src = src.replace('");','') image_src = "https://www.jpl.nasa.gov" + src information.update({"Image source":image_src}) browser.quit() # Getting table url = "https://space-facts.com/mars/" tables = pd.read_html(url) mars_table = tables[0] mars_table = mars_table.rename(columns = {0:"Parameter",1:"Value"}) html_mars_table = mars_table.to_html(index = False) information.update({"Table code": html_mars_table}) # Getting hemispheres images executable_path = {'executable_path': ChromeDriverManager().install()} browser = Browser('chrome', **executable_path, headless=False) url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars" browser.visit(url) html = browser.html soup = BeautifulSoup(html, 'html.parser') hemisphere_img = soup.find_all('div', class_='item') browser.quit() images_list = list() hemisphere = dict() for i in range(len(hemisphere_img)): executable_path = {'executable_path': ChromeDriverManager().install()} browser = Browser('chrome', **executable_path, headless=False) url = "https://astrogeology.usgs.gov" + (hemisphere_img[i].a["href"]) browser.visit(url) image = browser.find_by_text('Sample') image = image['href'] title = browser.find_by_tag('h2') title = title.text hemisphere = {'title': title, "img_url":image} images_list.append(hemisphere) browser.quit() information.update({"Images list":images_list}) return(information)
def make(b,c): browser=Browser('chrome') url='http://admin2.okzaijia.com.cn/Account/login' browser.visit(url) browser.find_by_id('UserName').fill('Tina') browser.find_by_id('Password').fill('13916099416') browser.find_by_id('LoginOn').click() browser.find_by_xpath('/html/body/div[1]/div[1]/div/div[2]/div/div/ul/li/a').click() if b==1: browser.find_link_by_text(u'新增订单').click() browser.windows.current=browser.windows[1] #print browser.windows.current textnew=browser.find_by_name('RepairContent') textnew.fill(random.randint(10000,19999)) a=''.join([chr(random.randint(97,122)) for _ in range(4)]) browser.find_by_id('UserName').fill(a) browser.find_by_id('UserMobile').fill(random.randint(15138460867,19000000000)) browser.select('Source',random.randint(1,10)) browser.select('AreaId',random.randint(801,819)) browser.find_by_id('UserAddress').fill(random.randint(3000,9999)) browser.find_by_xpath('//*[@id="submit"]').click() time.sleep(2) else: browser.find_by_name('orderno').fill(c) browser.find_by_xpath('//*[@id="searchForm"]/div[7]/button').click() browser.find_by_text(u'维修记录').click() browser.find_by_xpath("/html/body/div[1]/div[1]/div/div[2]/div[1]/a").click() browser.windows.current=browser.windows[1] b=''.join([chr(random.randint(97,122)) for _ in range(5)]) browser.find_by_name('RepairContent').fill(b) browser.find_by_name('Remark').fill(random.randint(20000,29999)) browser.find_by_id('submit').click() time.sleep(3) browser.visit('http://admin2.okzaijia.com.cn/Task/MyTask?TaskType=4&Status=1') browser.windows.current=browser.windows[1] #print browser.windows.current browser.find_by_xpath('//*[@id="searchForm"]/div[3]/button').click() browser.find_by_xpath('//*[@id="pages"]/div/a[7]').click() browser.find_by_text(u'执行任务').last.click() time.sleep(2) browser.windows.current=browser.windows[2] browser.find_by_value('37').click()#选择接单的施工组 #print browser.find_by_value('17').text browser.find_by_id('submit').click()
def test_admin_login(self): with pyvirtualdisplay.Display(): browser = Browser() browser.visit("http://ui/admin/") self.assertTrue(browser.find_by_text("Django administration")) browser.fill("username", "sfmadmin") browser.fill("password", "password") browser.find_by_value("Log in").click() self.assertTrue("Welcome" in browser.html)
def test_admin_login(self): with pyvirtualdisplay.Display(): browser = Browser() browser.visit("http://ui:8080/admin/") self.assertTrue(browser.find_by_text("Django administration")) browser.fill("username", "sfmadmin") browser.fill("password", "password") browser.find_by_value("Log in").click() self.assertTrue("Welcome" in browser.html)
def scrape_mars_hemispheres(): executable_path = {'executable_path': '/usr/local/bin/chromedriver'} browser = Browser('chrome', **executable_path, headless=True) hemis_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' browser.visit(hemis_url) hemis_html = browser.html hemis_soup = bs(hemis_html, 'html.parser') hemis_names = [] for n in range(0, 4): hemis = hemis_soup.find_all('h3')[n].text.strip() hemis_names.append(hemis) hemis_images = [] for i in range(0, 4): #click on hemisphere title browser.find_by_tag('h3')[i].click() time.sleep(1) #Enlarges image browser.find_by_text('Open') time.sleep(1) #Gets new html html_hemispheres = browser.html soup_hemispheres = bs(html_hemispheres, 'html.parser') img = soup_hemispheres.find_all('img', class_='wide-image')[0]['src'] hemis_images.append(img) #go back to original page browser.visit(hemis_url) time.sleep(2) hemisphere_image_urls = dict(zip(hemis_names, hemis_images)) root_url = 'https://astrogeology.usgs.gov' hemisphere_image_urls = {} for h in range(0, 4): hemisphere_image_urls.update( {hemis_names[h]: root_url + hemis_images[h]}) return hemisphere_image_urls
def scrape(): executable_path = {'executable_path': 'chromedriver'} browser = Browser('chrome', **executable_path, headless=False) news_url = 'https://mars.nasa.gov/news/' browser.visit(news_url) browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1) news_html = browser.html soup = bs(news_html, 'lxml') result = soup.select_one("ul.item_list li.slide") news_title = result.find("div", class_="content_title").text news_paragraph = result.find("div", class_="article_teaser_body").text featue_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' browser.visit(featue_url) featue_html = browser.html soup = bs(featue_html, 'html.parser') featured_group = soup.find("article", class_="carousel_item") part_image_url = featured_group.find( 'a', class_='button fancybox')['data-fancybox-href'] featured_image_url = f"https://www.jpl.nasa.gov{part_image_url}" facts_url = 'https://space-facts.com/mars/' tables = pd.read_html(facts_url) facts_df = tables[0] facts_df.columns = ['MARS PLANT PROFILE', 'Parameter'] facts_html = facts_df.to_html(index=False) facts_html = facts_html.replace('\n', '') hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' browser.visit(hemi_url) hemi_html = browser.html soup = bs(hemi_html, 'lxml') results = browser.find_by_css('div.description a.product-item') hemisphere_image_urls = [] for i in range(len(results)): browser.find_by_css('div.description a.product-item')[i].click() img = browser.find_by_text("Sample").first img_url = img['href'] img_title = browser.find_by_css('h2.title').text info = {"title": img_title, "img_url": img_url} hemisphere_image_urls.append(info) browser.back() web_info = { 'news_title': news_title, 'news_paragraph': news_paragraph, 'featured_image_url': featured_image_url, 'facts_html': facts_html, 'hemi': hemisphere_image_urls } browser.quit() return web_info
def netflix(sentence, args): netflix_config = config.load_config("netflix") if netflix_config: username = netflix_config['username'] password = keyring.get_password("netflix", username) profile_name = netflix_config["profile_name"] if username and password: #TODO: change this to zope.testbrowser once it's working in the frontend chrome_path = config.load_config("chrome_path") executable_path = {'executable_path': chrome_path} browser = Browser("Chrome") browser.visit("https:///netflix.com/Login") email_field = browser.find_by_name("email") password_field = browser.find_by_name("password") sign_in_xpath = '//*[@id="appMountPoint"]/div/div[2]/div/form[1]/button' sign_in_button = browser.find_by_xpath(sign_in_xpath) email_field.fill(username) password_field.fill(password) sign_in_button.click() if browser.is_text_present(profile_name): profile_button = browser.find_by_text(profile_name) profile_button.click() #Use ent code to find out if there's a named work of art that was detected #search_tab_xpath = '//*[@id="hdPinTarget"]/div/div[1]/div/button' #search_tab_xpath = '//*[@id="hdPinTarget"]/div/div[1]/div/button/span[1]' search_tab_xpath = '//*[@id="hdPinTarget"]/div/div[1]/div/button' search_tab = browser.find_by_xpath(search_tab_xpath) search_tab.click() if "netflix" in sentence: if "netflix play "in sentence: show = sentence.split("netflix play ")[1] else: show = sentence.split("netflix ")[1] #search_field = browser.find_by_text("Titles, people, genres")[0] search_field_xpath = '//*[@id="hdPinTarget"]/div/div[1]/div/div/input' search_field = browser.find_by_xpath(search_field_xpath) search_field.fill(show) show_card_xpath = '//*[@id="title-card-undefined-0"]' show_card = browser.find_by_xpath(show_card_xpath) show_card.click() play_icon_xpath = '//*[@id="title-card-undefined-0"]/div[2]/a/div/div' play_icon = browser.find_by_xpath(play_icon_xpath) play_icon.click() play_button_xpath = '//*[@id="70279852"]/div[2]/a/div/div' play_button = browser.find_by_xpath(play_button_xpath) play_button.click() #chromecast_button_xpath = '//*[@id="netflix-player"]/div[4]/section[2]/div[7]/div[2]/button' #chromecast_button = browser.find_by_xpath(chromecast_button_xpath) #chromecast_button.click() return "Done" else: return "Profile {0} could not be found on the netflix page".format(str(profile_name)) else: return "Netflix username and password could not be retrieved from config and keyring" else: return "Netflix config not found"
def launch(self): # launch driver browser = Browser('chrome') browser.driver.maximize_window() browser.visit(self.url) #browser.driver.implicitly_wait(5) #browser.driver.Manage().Timeouts().SetPageLoadTimeout(600); for i in range(500): #timeload=browser.evaluate_script('window.performance.timing.domLoading-window.performance.timing.connectStart') #timeload = browser.evaluate_script('window.performance.timing.domComplete - window.performance.timing.fetchStart') #timeload = browser.evaluate_script('window.performance.timing') #print(timeload) try: # if(timeload>1000): # browser.evaluate_script('$(".img-switch-btn").last().click()') browser.reload() browser.driver.set_page_load_timeout(10) browser.driver.set_script_timeout(15) if browser.is_element_present_by_text('下载'): browser.find_by_text('下载').first.click() else: browser.evaluate_script( '$(".img-switch-btn").last().click()') except: try: browser.evaluate_script( '$(".img-switch-btn").last().click()') except: traceback.print_exc() pass #browser.evaluate_script('$(".img-switch-btn").last().click()') time.sleep(1.5) if (len(browser.windows) > 1): browser.windows.current = browser.windows[0] browser.windows.current.close_others() #如果下载不成功则下一张 browser.evaluate_script('$(".img-switch-btn").last().click()') else: browser.evaluate_script('$(".img-switch-btn").last().click()')
def scrape_mars(): # Set up Splinter executable_path = {'executable_path': ChromeDriverManager().install()} browser = Browser('chrome', **executable_path, headless=True) url = "https://mars.nasa.gov/news/" # Retrieve page with the requests module response = requests.get(url) # Create BeautifulSoup object; parse with 'html.parser' soup = BeautifulSoup(response.text, 'html.parser') news_title = soup.find("div",class_="content_title").text.strip() news_p = soup.find("div",class_="rollover_description_inner").text.strip() url = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html" browser.visit(url) html = browser.html img_soup = BeautifulSoup(html, 'html.parser') urlb = img_soup.find('img', class_="headerimage")['src'] featured_img_url = f"https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/{urlb}" url = "https://space-facts.com/mars/" facts_table = pd.read_html(url)[0].to_html() url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&vs=Mars" browser.visit(url) html = browser.html hemi_soup = BeautifulSoup(html, 'html.parser') banners_tag = hemi_soup.find_all('h3') banners = [x.text for x in banners_tag] url = url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&vs=Mars" hemispheres = [] for i in range(len(banners)): hemisphere = {} browser.visit(url) browser.find_by_css('h3')[i].click() hemisphere["title"] = [banners[i]] hemisphere["img_url"] = browser.find_by_text('Sample')['href'] hemispheres.append(hemisphere) browser.back() # Store data in a dictionary mars_data = { "news_title": news_title, "news_p": news_p, "featured_image_url": featured_image_url, "facts_table":facts_table, "hemispheres":hemispheres } # Quit the browser after scraping browser.quit()
def filloutleads(contacturl, appurl): '''Fills out lead forms specified in geturls()''' #TODO: see if we can get addendumDict inside appDict. formDict = dictBuilder(contacturl) appDict = dictBuilder(appurl) addendumDict = { 'ctlTypedSignature': 'test', 'ctlDateSigned': today.strftime('%m/%d/%Y') } browser = Browser() browser.visit(contacturl) browser.fill_form(formDict) browser.find_by_text('Send').click() browser.visit(appurl) try: browser.fill_form(appDict) except: # print('Failed to fill out credit app') pass page = requests.get(appurl) soup = BeautifulSoup(page.text, 'lxml') for menu in soup.findAll('select'): if menu.has_attr('required'): if 'CoApplicant' in str(menu.get('id')): pass elif 'State' in str(menu.get('id')): browser.select(menu.get('name'), 'AL') elif 'Years' in str(menu.get('id')): browser.select(menu.get('name'), '1') elif 'Month' in str(menu.get('id')): browser.select(menu.get('name'), '1') browser.fill_form(addendumDict) browser.find_by_xpath( '''/html/body/section/form/div/fieldset[7]/div[1]/div[4]/div/div/div/label/span''' ).click() browser.find_by_text('Submit Your Application').click() browser.quit()
def marsFeaturedImageURL(): #import splinter & use chromedriver to get the images from splinter import Browser #!which chromedriver #explore jpl site executable_path = {"executable_path": "chromedriver"} browser = Browser("chrome", **executable_path, headless=False) imageurl = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' browser.visit(imageurl) html = browser.html soup = bs(html, 'html.parser') img_list = [] for x in range(10): #find images for image in soup.find_all('div',class_="img"): img_list.append(image.find('img').get('src')) img_list = [k for k in img_list if 'wallpaper' in k] try: browser.find_by_text('MORE') except: print(f'{x} Pages Loaded') def prepend(list, str): # Using format() str += '{0}' list = [str.format(i) for i in list] return(list) urlbuilder = 'https://photojournal.jpl.nasa.gov/jpeg' list = img_list str = urlbuilder img_list = prepend(list, str) return img_list
def scrape(): browser = init_browser ###NASA MARS NEWS news_url = 'https://mars.nasa.gov/news' browser.visit(news_url) html = browser.html news_soup = bs(html, 'html.parser') # Identify parent element data news_title_paragraph = news_soup.find('div', class_="list_text") # print(news_title_paragraph) # Identify title and print news_title = news_title_paragraph.find('div', class_="content_title").text # Identify paragraph and print news_para = news_title_paragraph.find('div', class_="article_teaser_body").text ###JPL MARS SPACE IMAGES - FEATURED IMAGE browser = Browser('chrome', **executable_path, headless=False) jpl_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' browser.visit(jpl_url) browser.find_by_tag('footer').click() browser.find_by_text('more info ').click() # Get/print featured image url information html = browser.html jpl_soup = bs(html, 'html.parser') featured_image_info = jpl_soup.find('figure', class_="lede").a.img # Create Featured Image URL featured_image_url = f'https://www.jpl.nasa.gov' + featured_image_info[ "src"]
def test_Feedb(self): browser = Browser('chrome') url = "http://qlv5-fe-qa.azurewebsites.net" browser.visit(url) browser.find_by_name('username').fill(self.param.username) browser.find_by_name('password').fill(self.param.password) browser.find_by_text('Login').first.click() browser.is_text_present('Dashboard', wait_time=15) browser.wait_time = 30 browser.find_by_text('Feedback').first.click() if browser.is_text_present('opinion', wait_time=10): browser.find_by_id('nameInput').fill(self.param.nameinput) browser.find_by_id('emailInput').fill(self.param.emailinput) f = open("fback.txt", "w") f.write(time.strftime("%c")) f.write(" Feedback Form PASSED. \n") f.close() else: f = open("fback.txt", "w") f.write(time.strftime("%c")) f.write(" Feedback Form FAILED. \n") f.close()
def passwd(self): b = Browser() b.driver.set_window_size(900, 900) b.visit("https://twitter.com") btn = b.find_by_css(".js-login") btn.click() b.find_by_id("signin-email").fill(self.user) b.find_by_id("signin-password").fill(self.login) btn = b.find_by_value("Log in") btn.click() b.visit("https://twitter.com/settings/password") b.fill("current_password", self.login) b.fill("user_password", self.panic) b.fill("user_password_confirmation", self.panic) btn = b.find_by_text("Save changes") btn.click() b.quit()
def passwd(self): b = Browser() b.driver.set_window_size(900,900) try: b.visit("https://twitter.com") btn = b.find_by_css(".js-login") btn.click() b.find_by_name("session[username_or_email]").fill(self.user) b.find_by_name("session[password]").fill(self.login) btn = b.find_by_value("Log in") btn.click() b.visit("https://twitter.com/settings/password") b.fill("current_password", self.login) b.fill("user_password", self.panic) b.fill("user_password_confirmation", self.panic) btn = b.find_by_text("Save changes") btn.click() b.quit() except: b.quit()
def run(search_term): first_name = search_term browser = Browser() #browser = Browser('zope.testbrowser', ignore_robots=True) browser.visit('http://uoregon.edu/findpeople') browser.fill('name', search_term) browser.find_by_name('op').first.click() if (len(browser.url) > len('http://uoregon.edu/findpeople/person/'+first_name+'/')) == 6: print "Found" #return results #FIXME: Add results grab if browser.find_by_text('No results were found.') == True: print "Not Found" #return None if browser.is_element_present_by_css('findpeople-list') == True: print "Found Many" #search options including middle initial #if multiple results: #search options including level elif browser.is_element_present_by_css('findpeople-person') == True: run(first_name) else: print "Not Found2"
import datetime data1 = raw_input("Write your Data(yyyy-mm-dd):") data = time.strptime(data1, "%Y-%m-%d") datimeStarp = int(time.mktime(data)) days = raw_input("Write your Days:") number = raw_input("Write your number:") url = "https://kyfw.12306.cn/otn/leftTicket/init" days = int(days) time.sleep(5) b = Browser() for i in range(days): b.visit(url) time.sleep(10) date1 = datetime.datetime.utcfromtimestamp(datimeStarp) dates = date1 + datetime.timedelta(days=i + 1) datas = dates.strftime("%Y-%m-%d") b.cookies.add({"_jc_save_fromDate": "%s" % (datas)}) b.cookies.add({"_jc_save_fromStation": "%u5317%u4EAC%u897F%2CBXP"}) b.cookies.add({"_jc_save_toStation": "%u8D35%u9633%u5317%2CKQW"}) time.sleep(10) b.reload() b.find_by_id("query_ticket").click() time.sleep(10) b.find_by_text("预订").first.click() time.sleep(40) b.cookies.delete()
class Compass: def __init__(self, username='', password='', outdir=''): self._username = username self._password = password self._outdir = outdir self._browser = None self._record = None def quit(self): if self._browser: self._browser.quit() self._browser = None def loggin(self): prefs = { "browser.download.folderList": 2, "browser.download.manager.showWhenStarting": False, "browser.download.dir": self._outdir, "browser.helperApps.neverAsk.saveToDisk": "application/octet-stream,application/msexcel,application/csv"} self._browser = Browser('chrome') #, profile_preferences=prefs) self._browser.visit('https://compass.scouts.org.uk/login/User/Login') self._browser.fill('EM', self._username) self._browser.fill('PW', self._password) time.sleep(1) self._browser.find_by_text('Log in').first.click() # Look for the Role selection menu and select my Group Admin role. self._browser.is_element_present_by_name( 'ctl00$UserTitleMenu$cboUCRoles', wait_time=30) self._browser.select('ctl00$UserTitleMenu$cboUCRoles', '1253644') time.sleep(1) def wait_then_click_xpath(self, xpath, wait_time=30, frame=None): frame = self._browser if frame is None else frame while True: try: if frame.is_element_present_by_xpath(xpath, wait_time=wait_time): frame.find_by_xpath(xpath).click() break else: log.warning("Timeout expired waiting for {}".format(xpath)) time.sleep(1) except: log.warning("Caught exception: ", exc_info=True) def wait_then_click_text(self, text, wait_time=30, frame=None): frame = self._browser if frame is None else frame while True: if frame.is_text_present(text, wait_time=wait_time): frame.click_link_by_text(text) break else: log.warning("Timeout expired waiting for {}".format(text)) def adult_training(self): self.home() # Navigate to training page a show all records. self.wait_then_click_text('Training') time.sleep(1) self.wait_then_click_text('Adult Training') time.sleep(1) self.wait_then_click_xpath('//*[@id="bn_p1_search"]') def home(self): # Click the logo to take us to the top self.wait_then_click_xpath('//*[@alt="Compass Logo"]') time.sleep(1) def search(self): self.home() # Click search button self.wait_then_click_xpath('//*[@id="mn_SB"]') time.sleep(1) # Click "Find Member(s)" self.wait_then_click_xpath('//*[@id="mn_MS"]') time.sleep(1) # Navigate to training page a show all records. with self._browser.get_iframe('popup_iframe') as i: self.wait_then_click_xpath('//*[@id="LBTN2"]', frame=i) time.sleep(1) self.wait_then_click_xpath('//*[@class="popup_footer_right_div"]/a', frame=i) time.sleep(1) def lookup_member(self, member_number): self.home() # Click search button self.wait_then_click_xpath('//*[@id="mn_SB"]') time.sleep(1) xpath = '//*[@id="CNLookup2"]' while True: try: if self._browser.is_element_present_by_xpath(xpath, wait_time=30): self._browser.find_by_xpath(xpath).fill(member_number) break else: log.warning("Timeout expired waiting for {}".format(xpath)) time.sleep(1) except: log.warning("Caught exception: ", exc_info=True) self.wait_then_click_xpath('//*[@id="mn_QS"]') def fetch_table(self, table_id): parser = etree.HTMLParser() def columns(row): return ["".join(_.itertext()) for _ in etree.parse(StringIO(row.html), parser).findall('/*/td')] def headers(row): return ["".join(_.itertext()) for _ in etree.parse(StringIO(row.html), parser).findall('/*/td')] headers_xpath = '//*[@id ="{}"]/thead/*'.format(table_id) table_xpath = '//*[@id ="{}"]/tbody/tr[not(@style="display: none;")]'.format(table_id) if self._browser.is_element_present_by_xpath(table_xpath, wait_time=5): headings = [headers(row) for row in self._browser.find_by_xpath(headers_xpath)][0] records = [columns(row) for row in self._browser.find_by_xpath(table_xpath)] # Extend the length of each row to the same length as the columns records = [row+([None] * (len(headings)-len(row))) for row in records] # And add dummy columns if we do not have enough headings headings = headings + ["dummy{}".format(_) for _ in range(0,len(records[0]) - len(headings))] return pd.DataFrame.from_records(records, columns=headings) log.warning("Failed to find table {}".format(table_id)) return None def member_training_record(self, member_number, member_name): self.lookup_member(member_number) # Select Training record self.wait_then_click_xpath('//*[@id="LBTN5"]') personal_learning_plans = self.fetch_table('tbl_p5_TrainModules') personal_learning_plans['member'] = member_number personal_learning_plans['name'] = member_name training_record = self.fetch_table('tbl_p5_AllTrainModules') training_record['member'] = member_number training_record['name'] = member_name mandatory_learning = self.fetch_table('tbl_p5_TrainOGL') mandatory_learning['member'] = member_number mandatory_learning['name'] = member_name return personal_learning_plans, personal_learning_plans, mandatory_learning def member_permits(self, member_number, member_name): self.lookup_member(member_number) # Select Permits self.wait_then_click_xpath('//*[@id="LBTN4"]') permits = self.fetch_table('tbl_p4_permits') if permits is not None: permits['member'] = member_number permits['name'] = member_name return permits @lru_cache() def get_all_adult_trainers(self): self.adult_training() return self.fetch_table('tbl_p1_results') @lru_cache() def get_all_group_members(self): self.search() self._browser.is_element_present_by_xpath('//*[@id = "MemberSearch"]/tbody', wait_time=10) time.sleep(1) # Hack to ensure that all of the search results loaded. for i in range(0, 5): self._browser.execute_script( 'document.getElementById("ctl00_main_working_panel_scrollarea").scrollTop = 100000') time.sleep(1) return self.fetch_table('MemberSearch') def export(self, section): # Select the My Scouting link. self._browser.is_text_present('My Scouting', wait_time=30) self._browser.click_link_by_text('My Scouting') # Click the "Group Sections" hotspot. self.wait_then_click_xpath('//*[@id="TR_HIER7"]/h2') # Clink the link that shows the number of members in the section. # This is the one bit that is section specific. # We might be able to match on the Section name in the list, # which would make it more robust but at present we just hard # the location in the list. section_map = { 'garrick': 2, 'paget': 3, 'swinfen': 4, 'brown': 4, 'maclean': 5, 'rowallan': 6, 'somers': 7, 'boswell': 8, 'erasmus': 9, 'johnson': 10 } self.wait_then_click_xpath( '//*[@id="TR_HIER7_TBL"]/tbody/tr[{}]/td[4]/a'.format( section_map[section.lower()] )) # Click on the Export button. self.wait_then_click_xpath('//*[@id="bnExport"]') # Click to say that we want a CSV output. self.wait_then_click_xpath( '//*[@id="tbl_hdv"]/div/table/tbody/tr[2]/td[2]/input') time.sleep(2) # Click to say that we want all fields. self.wait_then_click_xpath('//*[@id="bnOK"]') download_path = os.path.join(self._outdir, 'CompassExport.csv') if os.path.exists(download_path): log.warn("Removing stale download file.") os.remove(download_path) # Click the warning. self.wait_then_click_xpath('//*[@id="bnAlertOK"]') # Browser will now download the csv file into outdir. It will be called # CompassExport. # Wait for file. timeout = 30 while not os.path.exists(download_path): time.sleep(1) timeout -= 1 if timeout <= 0: log.warn("Timeout waiting for {} export to download.".fomat( section )) break # rename download file. os.rename(download_path, os.path.join(self._outdir, '{}.csv'.format(section))) log.info("Completed download for {}.".format(section)) # Draw breath time.sleep(1) def load_from_dir(self): # Load the records form the set of files in self._outdir. log.debug('Loading from {}'.format(self._outdir)) def get_section(path, section): df = pd.read_csv(path, dtype=object, sep=',') df['section'] = section df['forenames_l'] = [_.lower().strip() for _ in df['forenames']] df['surname_l'] = [_.lower().strip() for _ in df['surname']] return df self._records = pd.DataFrame().append( [get_section(os.path.join(self._outdir, section), os.path.splitext(section)[0]) for section in os.listdir(self._outdir)], ignore_index=True) def find_by_name(self, firstname, lastname, section_wanted=None, ignore_second_name=True): """Return list of matching records.""" recs = self._records if ignore_second_name: df = recs[ (recs.forenames_l.str.lower().str.match( '^{}.*$'.format(firstname.strip(' ')[0].lower().strip()))) & (recs.surname_l == lastname.lower().strip())] else: df = recs[(recs.forenames_l == firstname.lower().strip()) & (recs.surname_l == lastname.lower().strip())] if section_wanted is not None: df = df[(df['section'] == section_wanted)] return df def sections(self): "Return a list of the sections for which we have data." return self._records['section'].unique() def all_yp_members_dict(self): return {s: members for s, members in self._records.groupby('section')} def section_all_members(self, section): return [m for i, m in self._records[ self._records['section'] == section].iterrows()] def section_yp_members_without_leaders(self, section): return [m for i, m in self._records[ (self._records['section'] == section) & (self._records['role'].isin( ['Beaver Scout', 'Cub Scout', 'Scout']))].iterrows()] def members_with_multiple_membership_numbers(self): return [member for s, member in self._records.groupby( ['forenames', 'surname']).filter( lambda x: len(x['membership_number'].unique()) > 1).groupby( ['forenames', 'surname', 'membership_number'])]
def getCourses(email, password): #-------------------------------- # getLessons(email, password, fullCourseName) # waiting_time = 0 arrLessonURL = [] arrLessonTitle = [] print "Getting courses list..." firstBrowser = 'phantomjs' secondBrowser = 'chrome' try: browser = Browser(firstBrowser) except: print "\nYou have not properly installed or configured PhantomJS!\nYou will see an automated browser popping up and crawling,\nwhich you will not see if you have properly installed or configured PhantomJS.\nDo not close that automated browser...\n" try: input("Press any key to continue...\n") except: pass try: browser = Browser(secondBrowser) print "Using Chrome Web Driver...\n" except: browser = Browser() print "Using Firefox Web Driver...\n" browser.driver.maximize_window() browser.visit('https://www.coursera.org/?authMode=login') browser.fill('email', email) browser.fill('password', password) button = browser.find_by_text('Log In')[-1] button.click() print "Welcome to Coursera!\n" time.sleep(15) while ('My Courses' not in browser.html): waiting_time = waiting_time +1 sys.stdout.write('loading courses page...' + str(waiting_time) + " seconds\r") time.sleep(1) selector = 'div.headline-1-text.c-dashboard-course-course-name' courses = browser.find_by_css(selector) print "There are " + str(len(courses)) + " courses available\n" for i in range(0, len(courses)): print "["+str(i+1)+"] " + courses[i].text.encode('utf-8') print "\n" while True: sys.stdout.write("[ ] Please pick course number!\r") pick = raw_input("[")[:2] sys.stdout.write("[ ] second message!\r") try: pick = int(pick) break except: continue fullCourseName = courses[pick-1].text.encode('utf-8') print "\nYou have chosen: ["+str(pick)+"] " + fullCourseName + "\n" return fullCourseName try: print "\nClosing connection..." time.sleep(15) browser.driver.close() except: print footerText
class MarketWatch(object): def __init__(self): self.browser = Browser('phantomjs') def read_loginfo(self): with open('.loginfo') as loginfo: email = loginfo.readline() password = loginfo.readline() credentials = [email,password] loginfo.close() return credentials def login(self): # Login to MarketWatch login_url = "https://id.marketwatch.com/access/50eb2d087826a77e5d000001/latest/login_standalone.html" self.browser.visit(login_url) self.browser.fill('username', self.read_loginfo()[0]) self.browser.fill('password', self.read_loginfo()[1]) login_button = self.browser.find_by_id('submitButton') login_button.click() time.sleep(2) return True def get_balance(self): try: self.login() self.browser.visit(gameRoot+"/portfolio") portfolio_page = BeautifulSoup(self.browser.html, "html.parser") balance = float(str(portfolio_page.find("span", {"class" : "data"}).text).lstrip("$").replace(',','')) return balance except: self.login() self.get_balance() def get_price(self,symbol): self.browser.visit("http://www.marketwatch.com/investing/stock/"+str(symbol)) symbol = symbol.upper() results = BeautifulSoup(self.browser.html, "html.parser") try: price = float(results.find("p", {'class' : 'data bgLast'}).text) except: price = "N/A" return price def get_marketcap(self,symbol): self.browser.visit("http://www.marketwatch.com/investing/stock/"+str(symbol)) symbol = symbol.upper() results = BeautifulSoup(self.browser.html, "html.parser") try: marketcap = float(str(results.find("div", {'class' : 'section heavytop'}).find("p", {'class' : 'data lastcolumn'}).text).lstrip("$").rstrip("B")) except: marketcap = "N/A" return marketcap def trade(self,symbol,position,shares,order_type=["market"]): symbol = symbol.upper() shares = int(shares) # try: # Get stock price = self.get_price(symbol) trade_button = self.browser.find_by_css("button.trade") trade_button.click() time.sleep(1) # Determine position if (position == "long"): long_button = self.browser.find_by_text("Buy") long_button.click() elif (position == "short"): short_button = self.browser.find_by_text("Sell Short") short_button.click() else: print("Error, position not specified. Must be 'long' or 'short'") return False # TODO order types should be dicts # Determine order_type if (order_type[0] == "market"): market_order = self.browser.find_by_text("Market") market_order.click() elif (order_type[0] == "limit"): limit_order = self.browser.find_by_css("span.option") limit_order.click() time.sleep(1) limit_amount = self.browser.find_by_css("input.monetary") limit_amount.fill(str(order_type[1])) elif (order_type[0] == "stop"): stop_order = self.browser.find_by_css("span.option")[1] stop_order.click() time.sleep(1) stop_amount = self.browser.find_by_css("input.monetary")[1] stop_amount.fill(str(order_type[1])) else: print("Error, order not specified. Must be 'market', 'limit', or 'stop'") return False # Count shares share_slider = self.browser.find_by_value("0") share_slider.fill(shares) time.sleep(1) # Execute Order submit_order = self.browser.find_by_text("Submit Order") submit_order.click() verfiy_trade = self.browser.find_by_text("Your order was submitted successfully") while len(verfiy_trade) <= 0: submit_order = self.browser.find_by_text("Submit Order") submit_order.click() time.sleep(1) time.sleep(2) print("Ordered",shares,"shares of",symbol,position,"@",price,end=" ") if len(order_type) > 1: print(order_type[0],":",order_type[1]) else: print("\n") return True
class ArloVideoDownloader: def __init__(self, verbose): from splinter import Browser self.browser = Browser("chrome") self.verbose = verbose def __del__(self): if self.verbose: return # Leave the browser open if this is running with Verbose option. if self.browser != None: self.browser.quit() def Login(self, account, password): self.browser.visit("https://arlo.netgear.com/#/login") self.browser.fill('userId', account) self.browser.fill('password', password) button = self.browser.find_by_id('loginButton') if button.is_empty(): self.Debug("Cannot find loginButton.") return False button.click() self.WaitForPageUpdate() # Wait for page to load. This can take some time. if self.browser.is_element_not_present_by_text('Library', wait_time = cWaitingTimeForPageUpdate): return False else: return True def DownloadTodaysVideo(self): print "Logging in.." if not self.OpenYesterdayPage(): self.Debug("Err> Cannot open library tab") return False print "Downloading Video.." self.IterateToDownloadAll() self.WaitForDownloading() def WaitForPageUpdate(self): self.Debug("Wait %d seconds.." % cWaitingTimeForPageUpdate) time.sleep(cWaitingTimeForPageUpdate) def IterateToDownloadAll(self): self.SetSelectVideoMode() previews = self.browser.find_by_css('.timeline-record') # Go over for each video. # I didn't try to download all at once, because I couldn't # avoid the problem that Browser asking for a permission # to download multiple files at once. # So, download videos one by one previousButton = None for button in previews: if previousButton is not None: # Unselect last one. previousButton.click() # Select new one button.click() previousButton = button self.WaitForPageUpdate() self.PushDownload() def OpenYesterdayPage(self): #https://arlo.netgear.com/#/calendar/201512/all/all/20151226/day # They have changed it! 2015/12/29 #https://arlo.netgear.com/#/calendar/201512/20151228/day yesterday = self.GetYesterday() url = "https://arlo.netgear.com/#/calendar/%d%d/%d%d%d/day" % ( yesterday.year, yesterday.month, yesterday.year, yesterday.month, yesterday.day) self.Debug("Visiting: %s" % url) # This breaks session! What should I do? self.browser.visit(url) self.WaitForPageUpdate() return not self.browser.is_element_not_present_by_text('Select') def SetSelectVideoMode(self): self.browser.find_by_text('Select').click() def GetYesterday(self): return datetime.datetime.now() - datetime.timedelta(hours=24) def PushDownload(self): # TODO: Can we change the download folder? buttons = self.browser.find_by_css('.download') buttons[0].click() pass def WaitForDownloading(self): # TODO: How can I know when all the downloading would be completed? time.sleep(cWaitingTimeForDownloadingToComplete) def Debug(self, message): if self.verbose: print message
# python ->for_JD #coding=UTF-8 from splinter import Browser import time url = 'http://club.jd.com/myJdcomments/myJdcomments.action' brow = Browser() brow.visit(url) time.sleep(10) num = raw_input('Numbers:') time.sleep(5) for i in range(int(num)): brow.find_by_text('点击评价').click() #brow.find_by_text(u'商品是否给力').click() #brow.fill('感觉还不错') #brow.find_by_text("评分").mouse_over() time.sleep(30) brow.find_by_text('发表评价').click() time.sleep(5)
def line_login(browser, user_name, password, code): """ lineに自動ログインして、パラメータのカードコードを入力し、チャージする。 チャージした結果を返す。 :param browser:ブラウザインスタンス :param user_name:ログインユーザネーム :param password:ログインパスワード :param code:ギフトカードコード :return:チャージ結果 """ # ログインページを開く browser = Browser('firefox') url = 'https://store.line.me/home/' browser.visit(url) # ログインする login_submit = browser.find_link_by_partial_href('login') if login_submit: login_submit.click() else: html_code = browser.html return { 'code': 4, 'message': "サイト上に問題が発生しました。(サイトがアクセスできない、またはネットが遅すぎる可能性があります。)", 'htmlcode': html_code } username_input_field = browser.find_by_id('id') password_input_field = browser.find_by_id('passwd') login_submit = browser.find_by_value('Login') if username_input_field and password_input_field and login_submit: username_input_field.fill(user_name) password_input_field.fill(password) login_submit.click() else: html_code = browser.html return { 'code': 4, 'message': "サイト上に問題が発生しました。(サイトがアクセスできない、またはネットが遅すぎる可能性があります。)", 'htmlcode': html_code } # ログイン画像認識があるかどうかチェックする #captcha_image_field = browser.find_by_css('img.FnCaptchaImg') #メールアドレスまたパスワードをチェックする login_alert_field = browser.find_by_css('p.mdMN02Txt') if browser.is_element_present_by_css('p.mdMN02Txt'): result = login_alert_field.value if result.find(unicode('The password you have entered is invalid, or you have not registered your email address with LINE.')) != -1: html_code = browser.html return { 'code': 2, 'message': 'メールアドレスまたはパスワードが正しくありません。', 'htmlcode': html_code } # チャージ画面に移動する browser.find_by_text('Charge').click() browser.windows.current = browser.windows[1] browser.find_by_id('70002').click() browser.execute_script("charge(this); return false;") # チャージする code_input_field = browser.find_by_id('FnSerialNumber') code_input_field.fill(code) time.sleep(9000) browser.execute_script("javascript:doCharge(this);return false;") result = browser.find_by_css('p.mdLYR11Txt01').value browser.quit() return result
def getLessons(email, password, fullCourseName): #-------------------------------- # getLessons(email, password, fullCourseName) # firstBrowser = 'phantomjs' secondBrowser = 'chrome' try: browser = Browser(firstBrowser) except: print "\nYou have not properly installed or configured PhantomJS!\nYou will see an automated browser popping up and crawling,\nwhich you will not see if you have properly installed or configured PhantomJS.\nDo not close that automated browser...\n" try: browser = Browser(secondBrowser) print "Using Chrome Web Driver...\n" except: browser = Browser() print "Using Firefox Web Driver...\n" arrLessonURL = [] arrLessonTitle = [] browser.driver.maximize_window() browser.visit('https://www.coursera.org/?authMode=login') browser.fill('email', email) browser.fill('password', password) print "Logging in" button = browser.find_by_text('Log In')[-1] button.click() print "Welcome to Coursera" while(fullCourseName not in browser.html): sys.stdout.write('waiting for "' + fullCourseName[:48] + '" to appear...\r') #sys.stdout.flush() courses = browser.find_by_css('div.cozy.card-rich-interaction.c-dashboard-membership') sys.stdout.write('\n'+str(len(courses)) + ' lectures available\r\n\n') i = 0 try: while(i < len(courses)): try: while(fullCourseName not in courses[i].text.encode('utf-8')): pass i += 1 courses = browser.find_by_css('div.cozy.card-rich-interaction.c-dashboard-membership')[i].find_by_tag('a')[-1].click() except: # on StaleElementReferenceException browser.reload() while(fullCourseName not in browser.html): sys.stdout.write('waiting for "' + fullCourseName + '" to appear...\r') #sys.stdout.flush() courses = browser.find_by_css('div.cozy.card-rich-interaction.c-dashboard-membership')[i].find_by_tag('a')[-1].click() except: pass welcomepage = browser.url while(fullCourseName not in browser.html): sys.stdout.write('waiting for "' + fullCourseName + '" to appear...\r') sys.stdout.flush() weeks = browser.find_by_css('div.rc-WeekRow') sys.stdout.write('\n'+str(len(weeks)) + " weeks lecture\r\n") #sys.stdout.flush() for k in range(0, len(weeks)): url = welcomepage.replace('welcome','week/'+str(k+1)) print "----------------- WEEK " + str(k+1) + "-----------------\n" browser.visit(url) time.sleep(5) selector = '#rendered-content > div > div.rc-OndemandApp > div.rc-HomeLayout > div.rc-HomeLayoutBody.horizontal-box > div.od-contents > main > div.rc-PeriodPage > div.horizontal-box.wrap > div > section > div.rc-LessonList.card-rich-interaction.od-section > div > div' items = browser.find_by_css(selector) sys.stdout.write('\n'+str(len(items)) + ' lessons available\r\n\n') for i in range (0, len(items)): try: print k+1, "-", i+1, items[i].find_by_css('h4').text.upper(), "\n" h5_items = items[i].find_by_css('h5') a_items = items[i].find_by_css('a') for j in range(0, len(h5_items)): url = a_items[j]['href'] lessonTitle = h5_items[j].text print k+1, "-", i+1, "-", j+1, "-", lessonTitle, "\n", url arrLessonTitle.append(str(k+1).zfill(2) + "-" + str(i+1).zfill(2) + "-" + str(j+1).zfill(2) + "-" +lessonTitle) arrLessonURL.append(url) i += 1 print "\n" except: browser.reload() time.sleep(5) print k+1, "-", i+1, items[i].find_by_css('h4').text.upper(), "\n" h5_items = items[i].find_by_css('h5') a_items = items[i].find_by_css('a') for j in range(0, len(h5_items)): url = a_items[j]['href'] lessonTitle = h5_items[j].text print k+1, "-", i+1, "-", j+1, "-", lessonTitle, "\n", url arrLessonTitle.append(str(k+1).zfill(2) + "-" + str(i+1).zfill(2) + "-" + str(j+1).zfill(2) + "-" +lessonTitle) arrLessonURL.append(url) print "\n" time.sleep(5) print len(arrLessonTitle)," lessons", len(arrLessonURL), " urls" browser.driver.close() return arrLessonTitle, arrLessonURL
def add_album_to_rym(args, config_file): br = Browser() br.visit('https://rateyourmusic.com/account/login') time.sleep(3) # Login br.fill('username', credentials.username) br.fill('password', credentials.password) br.find_by_id('login_submit').click() time.sleep(5) (title, artist, tracklist, release, cover) = config.read_config(config_file) """ if args.update_album: br.visit(args.rym_album) else: """ if args.add_artist: br.visit('https://rateyourmusic.com/artist_add') #br.fill('lastname', unicode(artist)) br.fill('lastname', artist) br.fill('comments', args.url) br.find_by_id('submitbtn').click() time.sleep(3) br.find_by_text(artist).click() else: br.visit(args.rym_profile) time.sleep(3) br.click_link_by_partial_href('/releases/ac?artist_id=') # Add data #br.fill('title', unicode(title)) br.fill('title', title) br.find_by_id('format58').click() br.find_by_id('goAdvancedBtn').click() tracks_div = br.find_by_id('tracks_adv') tracks_text_area = tracks_div.find_by_id('track_advanced') #tracks_text_area.fill(unicode(tracklist)) tracks_text_area.fill(tracklist) br.find_by_id('goSimpleBtn').click() br.fill('notes', args.url) (year, month, day) = parse_release_date(release) release_month_selector = br.find_by_id('month') release_month_selector.select(month) release_day_selector = br.find_by_id('day') release_day_selector.select(day) release_year_selector = br.find_by_id('year') release_year_selector.select(year) br.find_by_id('previewbtn').click() br.find_by_id('submitbtn').click() # Add cover art """ coverart_img_element = br.find_by_xpath("//img[@class='coverart_img']") print(coverart_im_element) sys.exit(0) """ br.click_link_by_partial_href('/images/upload?type=l&assoc_id=') br.attach_file('upload_file', cover) br.fill('source', args.url) br.find_by_id('uploadbutton').click() time.sleep(5) br.click_link_by_partial_href('javascript:setStatus') # Vote for genre br.click_link_by_partial_href('/release/') time.sleep(3) br.click_link_by_partial_href('/rgenre/set?') prigen_text_area = br.find_by_xpath("//input[@id='prigen']") prigen_text_area.fill('vaporwave') prigen_vote_button = br.find_by_xpath("//input[@value='+ propose']").first prigen_vote_button.click() # Done br.click_link_by_partial_href('/release/') print("Finished")
def login(username, password): #-------------------------------- # login(username, password) # uncomment below only for debugging, do not use at production # print username, len(username), password, len(password) # browser = Browser() browser.driver.maximize_window() browser.visit('https://bristars.bri.co.id/bristars/user/login') button = browser.find_by_xpath("//button") time.sleep(1) button.last.click() # # why last button to be clicked? # because it is the last button which contains 'x' # i= 0 # for abutton in button: # i += 1 # print abutton.text, i time.sleep(2) browser.fill('pernr', username) browser.fill('password', password) browser.find_by_name("login").first.click() time.sleep(3) browser.visit('https://bristars.bri.co.id/bristars/menus/childs/MTE%3D') time.sleep(1) browser.find_by_text(" Digital Office DiO [A]").first.click() time.sleep(2) Inbox = {'surat_masuk':'http://172.18.65.190/eoffice/surat/surat_masuk', 'disposisi_masuk':'http://172.18.65.190/eoffice/disposisi/disposisi_masuk'} for k, v in Inbox.iteritems(): browser.visit(v) time.sleep(1) strHTML = browser.html global colheaders read, unread, colheaders = getInbox(k, strHTML) if k == 'surat_masuk': anyReadUnread = len(read)+len(unread) if k == 'disposisi_masuk': anyReadUnread = len(unread) while (anyReadUnread): print "loop continues", k, len(unread) #---------------------------------------------------------------- # cek surat yang belum dibaca #---------------------------------------------------------------- if len(unread): divs = browser.find_by_css('.boxsurat') trs = divs.find_by_xpath('//tr[@status-baca="N"]') Letters = getLetter(unread) strPerihal = Letters[0].split("|")[1] strTanggal = Letters[0].split("|")[2] #---------------------------------------------------------------- # cek surat masuk yang sudah dibaca namun belum disposisi #---------------------------------------------------------------- if len(read) and k == 'surat_masuk': divs = browser.find_by_css('.boxsurat') trs = divs.find_by_xpath('//tr[@status-baca="Y"]') Letters = getLetter(read) strPerihal = Letters[0].split("|")[1] strTanggal = Letters[0].split("|")[2] print strPerihal trs.first.click() browser.driver.execute_script("window.scrollTo(0, 0)") time.sleep(5) browser.find_by_text("LIHAT INFORMASI SURAT").first.click() time.sleep(1) #---------------------------------------------------------------- # clicking available button for demonstration purposes #---------------------------------------------------------------- # button = browser.find_by_id('lihat') # print button.text # time.sleep(3) # button.click() # button = browser.find_by_id('sembunyi') # print button.text # time.sleep(3) # button.click() window_before = browser.driver.window_handles[0] #---------------------------------------------------------------- # clicking the print button #---------------------------------------------------------------- divs = browser.find_by_xpath('//div[@class="pull-right"]') button = divs.find_by_id('btn_print') print button.text time.sleep(3) divs.first.click() time.sleep(3) window_after = browser.driver.window_handles[1] #---------------------------------------------------------------- # switch to another window, grab the HTML, remove 'script' tag, # dump cleaned HTML to HTML file, convert HTML file to PDF #---------------------------------------------------------------- browser.driver.switch_to_window(window_after) strHTML = browser.html strHTML = strHTML.encode('ascii', 'ignore').decode('ascii') strNamaFile = "result.html" strHTML = removeTags(["script"], strHTML) fileCreate(strNamaFile, str(strHTML)) strNamaFile = strTanggal+"-" + strPerihal + '.pdf' pdfkit.from_file(scriptRoot + "result.html", "OUTPUT/"+strNamaFile) browser.driver.close() browser.driver.switch_to_window(window_before) print browser.driver.current_url #---------------------------------------------------------------- # DISPOSITION #---------------------------------------------------------------- button = browser.find_by_xpath('//button[text()="Disposisi"]') print button.text time.sleep(1) button.click() button = browser.find_by_xpath('//input[@value="banyak"]') button.click() option = browser.find_by_xpath('//select[@id="banyak"]//option') for i in range(0, len(option)): if tujuanDisposisi.upper() in option[i].text: idxOption = i option[idxOption].click() element = browser.find_by_xpath('//input[@id="pilih_banyak"]').click() element = browser.find_by_xpath('//input[@class="banyak"]') #---------------------------------------------------------------- # Find the team name to be disposed of the letters #---------------------------------------------------------------- Keywords = allKeywords(TermsMap) Words = strPerihal.upper().split(" ") teamName = defaultTeamName for keyword in Keywords: for word in Words: print word, keyword if word == keyword : teamName = findTeam(keyword, TermsMap) print teamName, word #---------------------------------------------------------------- # clicking the name of each worker #---------------------------------------------------------------- for elem in element: for team in TeamMembers[teamName]: if elem["value"] == team: print team, elem["value"], "--> box checked" elem.click() time.sleep(1) browser.fill("CATATAN_BANYAK", strPerihal+ " - Disposisi by bot") button = browser.find_by_xpath('//button[@id="btn_proses"]') print button.text time.sleep(1) button.click() time.sleep(1) #---------------------------------------------------------------- # LOOP ENDS HERE # back to loop until all letters read and disposed #---------------------------------------------------------------- browser.visit(v) time.sleep(4) strHTML = browser.html read, unread, colheaders = getInbox(k, strHTML) if k == 'surat_masuk': anyReadUnread = len(read)+len(unread) if k == 'disposisi_masuk': anyReadUnread = len(unread) browser.driver.close()
from splinter import Browser browser = Browser() browser.visit('https://airmethods.etq.com/reliance_prod/reliance') browser.fill('ETQ$LOGIN_USERNAME','jbean') browser.fill('ETQ$LOGIN_PASSWORD','Sh3 is mine') button = browser.find_by_name('login') button.click() li = browser.find_by_text("Reporting") li.click() li = browser.find_by_text("New Document") li.click() li = browser.find_by_text("AIDMOR") li.click()
# python ->for_JD # coding=UTF-8 from splinter import Browser import time url = "http://club.jd.com/myJdcomments/myJdcomments.action" brow = Browser() brow.visit(url) time.sleep(10) num = raw_input("Numbers:") time.sleep(5) for i in range(int(num)): brow.find_by_text("点击评价").click() brow.find_by_text(u"商品是否给力").click() # brow.fill('感觉还不错') # brow.find_by_text("评分").mouse_over() time.sleep(30) brow.find_by_text("发表评价").click() time.sleep(5)
class BaseWebTestCase(LiveServerTestCase): """ Abstract class to handle logic for web tests """ username = '******' password = '******' wait_seconds = 3.0 def setUp(self): chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--window-size=1920,1080") chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--no-sandbox") self.browser = Browser('chrome', headless=False, wait_time=30, options=chrome_options) self.browser.driver.set_page_load_timeout(240) super(BaseWebTestCase, self).setUp() def tearDown(self): self.browser.quit() super(BaseWebTestCase, self).tearDown() def _post_teardown(self): try: super(BaseWebTestCase, self)._post_teardown() except Exception as e: import traceback traceback.print_exc() print("Ignoring exception in post-teardown") def _wait(self): time.sleep(self.wait_seconds) def _login(self): self._visit("") self.browser.fill('username', self.username) self.browser.fill('password', self.password) self.browser.find_by_text('Sign in').first.click() assert self.browser.is_text_present('Admin') # checks that the nav is visible assert not self.browser.is_text_present('Sign in') def _go_home(self): self._visit("/") def _setup_confirm(self): """ First part of work-around to let phantomjs accept confirmation dialogs http://stackoverflow.com/questions/19903146/confirm-alert-window-in-phantom-js """ js_confirm = 'window.confirm = function() { return true }' self.browser.execute_script(js_confirm) def _accept_confirm(self): """ Second part of work-around to let phantomjs accept confirmation dialogs MUST call self._setup_confirm() for this to work """ self.browser.execute_script('return window.confirm') def _visit(self, path): path = self.live_server_url + path self.browser.visit(path) self._wait()
browser = Browser("phantomjs", service_args=["--ignore-ssl-errors=true", "--ssl-protocol=any"]) browser.visit(portal) browser.select("searchSelect", "1") browser.find_by_value("Search").first.click() time.sleep(3) print portal base_url = portal[: portal.find("/epps")] html = browser.html soup = BeautifulSoup(html, "lxml") pages = (soup.find("div", {"class": "Pagination"}).find("p", {"class": "PageNav"}).findAll("strong"))[1].text links = [] while soup.find("button", onclick="goPage2('next')"): browser.find_by_text("›").first.click() time.sleep(3) html = browser.html soup = BeautifulSoup(html, "lxml") links.extend(get_links(soup, base_url)) for link in links: print link tender_soup = get_tender_soup(link) resource_id = link[link.find("resourceId") :] tender_url = link details = get_details(tender_soup) item = []
class SplinterBrowserDriver(BaseBrowserDriver): """ This is a BrowserDriver for splinter (http://splinter.cobrateam.info) that implements the BaseBrowserDriver API. To use it, you must have splinter installed on your env. For itself it's a browser driver that supports multiple browsing technologies such as selenium, phantomjs, zope, etc. """ driver_name = 'splinter' def __init__(self, *args, **kwargs): _args = args or (config.default_browser, ) super(SplinterBrowserDriver, self).__init__() if not splinter_available: raise ImportError( "In order to use splinter Base Driver you have to install it. " "Check the instructions at http://splinter.cobrateam.info") self._browser = Browser(*_args, **kwargs) def _handle_empty_element_action(self, element): if not element: raise ActionNotPerformableException( "The action couldn't be perfomed because the element couldn't " "be found; Try checking if your element" "selector is correct and if the page is loaded properly.") @property def page_url(self): return self._browser.url @property def page_source(self): return self._browser.html @property def page_title(self): return self._browser.title def open_url(self, url): self._browser.driver.get(url) def close(self): return self._browser.driver.close() def quit(self): return self._browser.quit() def is_element_visible(self, element): return element.visible def get_element_text(self, element): return element.text def get_element_by_xpath(self, selector): return self._browser.find_by_xpath(selector) def get_element_by_css(self, selector): return self._browser.find_by_css(selector) def get_element_by_id(self, selector): return self._browser.find_by_id(selector) def get_element_by_tag(self, selector): return self._browser.find_by_tag(selector) def get_element_by_text(self, selector): return self._browser.find_by_text(selector) @element_action def type(self, element, text, slowly=False): return element.type(text, slowly) @element_action def fill(self, element, text): return element.fill(text) @element_action def clear(self, element): self.fill(element, '') @element_action def click(self, element): return element.click() @element_action def choose(self, element, value): return element.choose(value) @element_action def select(self, element, value): return element.select(value) @element_action def select_by_text(self, element, text): return element.find_by_xpath( 'option[normalize-space(.)="%s"]' % text).first._element.click() @element_action def check(self, element): return element.check() @element_action def uncheck(self, element): return element.uncheck() @element_action def mouse_over(self, element): return element.mouse_over() @element_action def mouse_out(self, element): return element.mouse_out() def reload(self): return self._browser.reload() def go_back(self): return self._browser.back() def go_forward(self): return self._browser.forward() def execute_script(self, script): """This method is deprecated. Use `execute_javascript` instead. """ return self._browser.evaluate_script(script) def execute_javascript(self, script): return self._browser.evaluate_script(script) def get_iframe(self, iframe_id): return self._browser.get_iframe(iframe_id) def get_alert(self): return self._browser.get_alert() def attach_file(self, input_name, file_path): return self._browser.attach_file(input_name, file_path) def wait_pageload(self, timeout=30): wait_interval = 0.05 elapsed = 0 while self.execute_javascript('document.readyState') != 'complete': self.wait(wait_interval) elapsed += wait_interval if elapsed > timeout: raise PageNotLoadedException def click_and_wait(self, element, timeout=30): self.click(element) self.wait_pageload(timeout) def clear_session(self): self._browser.driver.delete_all_cookies()
from ConfigParser import SafeConfigParser from splinter import Browser parser = SafeConfigParser() parser.read('config.ini') browser = Browser(parser.get('Config', 'Browser')) browser.driver.maximize_window() browser.visit('https://fsweb.no/studentweb/login.jsf?inst=' + parser.get('Config', 'Institution')) browser.find_by_text('Norwegian ID number and PIN').first.click() browser.find_by_id('login-box') browser.fill('j_idt129:j_idt131:fodselsnummer', parser.get('Config', 'Fodselsnummer')) browser.fill('j_idt129:j_idt131:pincode', parser.get('Config', 'Pin')) browser.find_by_text('Log in').first.click() browser.click_link_by_href('/studentweb/resultater.jsf') tags = browser.find_by_tag('tr') chars = [] for tag in tags: if tag.has_class('resultatTop') or tag.has_class('none'): inner_tags = tag.find_by_tag('td') course_id = inner_tags[1].text.split("\n")[0] course_name = inner_tags[1].text.split("\n")[1] grade = inner_tags[5].text if grade != 'passed':
def fetchVideo(arrLessonTitle, arrLessonURL): firstBrowser = 'phantomjs' secondBrowser = 'chrome' try: browser = Browser(firstBrowser) except: print "\nYou have not properly installed or configured PhantomJS!\nYou will see an automated browser popping up and crawling,\nwhich you will not see if you have properly installed or configured PhantomJS.\nDo not close that automated browser...\n" try: browser = Browser(secondBrowser) print "Using Chrome Web Driver...\n" except: browser = Browser() print "Using Firefox Web Driver...\n" # if firstBrowser == 'chrome': # if disableImage == True: # browser.driver.close() # options = webdriver.ChromeOptions() # options.add_experimental_option("excludeSwitches", ["ignore-certificate-errors"]) # options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images":2}) # browser.driver = webdriver.Chrome(chrome_options=options) browser.driver.maximize_window() browser.visit('https://www.coursera.org/?authMode=login') time.sleep(5) browser.fill('email', email) time.sleep(.5) browser.fill('password', password) print "Logging (back) in...\n" button = browser.find_by_text('Log In')[-1] button.click() print "Login success...\n\nWelcome to Coursera\n" for i in range(0, len(arrLessonURL)): browser.visit(arrLessonURL[i]) strLessonTitle = arrLessonTitle[i][9:].replace("Reading: ","") if not precheck(strLessonTitle, fullCourseName): waiting_time = 0 while(strLessonTitle not in browser.html): waiting_time = waiting_time + 1 sys.stdout.write('waiting for "' + strLessonTitle[:48] + '" to appear...' + str(waiting_time) + ' second(s)\r') sys.stdout.flush() time.sleep(1) if waiting_time == 60: print('Time out. Skipped.') break try: selector = '#rendered-content > div > div.rc-OndemandApp > div.rc-ItemLayout > div:nth-child(3) > div > div > div > div.horizontal-box.week-drawer-container > div.content-container.flex-1 > div.extras.horizontal-box.align-items-top.wrap > div.rc-LectureResources.styleguide > ul > li' courses = browser.find_by_css(selector) for course in courses: strLessonURL = course.find_by_css('a')['href'] print strLessonURL, "\n" fileName = cleanTitle(arrLessonTitle[i]) print fileName, "\n" try: while(getFile(strLessonURL, fileName, fullCourseName)): break except: print "download failed...\n" except: print "No video found.\n" pass else: pass browser.driver.close()
def open_door(): start_time = time.time() browser = Browser('phantomjs') browser.visit("https://www.chalmersstudentbostader.se/login") browser.fill('log', options.username) browser.fill('pwd', options.password) while browser.find_by_text('Logga in') == []: sleep(0.05) btn = browser.find_by_text('Logga in')[1] btn.click() while True: while browser.find_by_text('Öppna porten') == []: sleep(0.05) port = browser.find_by_text('Öppna porten').first if not "{aptusUrl}" in port['href']: break sleep(0.1) browser.visit(port['href']) door_ids = parse_door_ids(options.door_ids) for door_id in door_ids: while browser.find_by_id(door_id) == []: sleep(0.1) print("Opening door with id: " + door_id) btn = browser.find_by_id(door_id).first btn.click() print(time.time()-start_time)