def JPL_image(): browser = init_browser() executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path, headless=False) url_jpl = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' browser.visit(url_jpl) #for button in buttons: browser.find_link_by_partial_text('FULL IMAGE').click() browser.is_element_not_present_by_id('images', wait_time=2) browser.find_link_by_partial_text('more info').click() link = browser.find_link_by_partial_href('largesize') image_url = link.html.split("=") image_url = link.html.split("=")[-1].lstrip('"') image_url = image_url.rstrip('">') featured_image_url = 'https://www.jpl.nasa.gov' + image_url return featured_image_url
def get_token(username, password): from splinter import Browser import time browser = Browser() try: browser.visit("https://timetableplanner.app.uq.edu.au/") count = 0 while browser.is_element_not_present_by_text( "Sign in and get started!") and count < 10: time.sleep(1) count += 1 if browser.is_element_present_by_text("Sign in and get started!"): browser.find_by_text("Sign in and get started!").click() else: return None count = 0 while browser.is_element_not_present_by_id("username") and count < 10: time.sleep(1) count += 1 if browser.is_element_present_by_id( "username") and browser.is_element_present_by_id("password"): browser.fill('username', username) browser.fill('password', password) else: return None count = 0 while browser.is_element_not_present_by_value("LOGIN") and count < 10: time.sleep(1) count += 1 if browser.is_element_present_by_value("LOGIN"): browser.find_by_value("LOGIN").click() else: return None count = 0 while "remember_token" not in browser.cookies and count < 10: time.sleep(1) count += 1 if "remember_token" in browser.cookies: return browser.cookies['remember_token'] else: return None finally: try: browser.quit() except: print("Unable to close browser. Do it yourself!")
class DownPatent(object): def __init__(self, db, down_url): self.db = db self.down_url = down_url self.browser = Browser("phantomjs", wait_time=10) #self.browser = Browser() #下载专利 def download(self, patentno): #访问网页 #网页加载超时 #down_flag, 0:未下载,1:不存在,2:下载失败 download_link = "" down_flag = 0 if True: print "打开网页" self.browser.visit(self.down_url) if not self.browser.is_element_not_present_by_value("查询", wait_time=10): #填写专利号 self.browser.fill("cnpatentno", patentno) self.browser.find_by_value("查询").first.click() print "填写专利号" #连接超时,404 if self.browser: print "打开验证码网页" #一个最多循环20次 code_handler = CodeHandler() #填写验证码 list_fill_text = [] #验证码路径 list_code_path = [] #验证码分割标志 list_split_flag = [] #验证码识别标志 list_reg_flag = [] for code_num in xrange(20): print code_num #查找验证码 if not self.browser.is_element_not_present_by_id("getcode", wait_time=5): print "查找验证码" #截图 #self.browser.driver.maximize_window() self.browser.driver.save_screenshot("screenshot.png") #获取验证码图片 image = Image.open("screenshot.png") image_location = self.find_location(image) image_code = image.crop((image_location[0], image_location[1], image_location[0]+52, image_location[1]+21)) save_path = "static/images/onlinecode/" + time.ctime() + ".png" save_path_temp = "../%s" % save_path image_code.save(save_path_temp) list_code_path.append(save_path) #分割图片 list_split_image = self.deal_split(code_handler, image_code) #识别,如果能正确识别,则识别,不能,则重新获取验证码 if len(list_split_image) == 4: print "正确分割" list_split_flag.append(1) reg_plain_text = self.reg_code(list_split_image) fill_text = "".join(reg_plain_text) list_fill_text.append(fill_text) #填写验证码 #hand_fill_text = raw_input("Enter fill text:") self.browser.fill("ValidCode", fill_text) self.browser.find_by_value("确定").first.click() print self.browser.html.encode("utf-8").find("验证码输入错误") if self.browser.html.encode("utf-8").find("验证码输入错误") == -1: list_reg_flag.append(1) if self.browser.html.encode("utf-8").find("没有找到该专利") == -1: down_link_one = self.browser.find_link_by_text("申请公开说明书图形下载(标准版)") down_link_two = self.browser.find_link_by_text("申请公开说明书图形下载(极速版)") if down_link_one or down_link_two: print "查找说明书图形下载链接" list_reg_flag.append(1) if down_link_one: self.browser.click_link_by_text("申请公开说明书图形下载(标准版)") else: self.browser.click_link_by_text("申请公开说明书图形下载(极速版)") print "查找下载链接" #查找下载链接 download_a = self.browser.find_link_by_text("下载专利") if download_a: download_link = download_a["href"] #找到下载链接 down_flag = 3 break else: print "下载失败" #下载失败 down_flag = 2 break ''' else: print "识别正确,未找到链接" list_reg_flag.append(0) self.browser.back() self.browser.reload() ''' else: print "不存在专利" #没有专利 down_flag = 1 break else: print "识别错误,重新加载" list_reg_flag.append(0) self.browser.back() self.browser.reload() else: print "不能分割" list_fill_text.append("") list_split_flag.append(0) list_reg_flag.append(0) self.browser.reload() #存入数据集onlinecode,专利号,验证码路径,识别码,识别标志,不可分标志,时间 for code_path, fill_text, split_flag, reg_flag in zip(list_code_path,list_fill_text, list_split_flag, list_reg_flag): try: self.db.onlinecode.insert({"indexflag": patentno, "codepath": code_path, "filltext": fill_text, \ "splitflag": split_flag, "regflag": reg_flag, "time": time.ctime()}) except: pass return download_link #处理验证码 def deal_split(self, code_handler, image): list_split_image = code_handler.main_deal_split(image) return list_split_image #识别 def reg_code(self, list_split_image): all_plain_text = "0123456789abcdef" reg_plain_text = [] neural = NeuralWork() list_input_data = [] for each_split_image in list_split_image: each_input_data = [] for x in xrange(each_split_image.size[1]): for y in xrange(each_split_image.size[0]): if each_split_image.getpixel((y, x)): each_input_data.append(0) else: each_input_data.append(1) list_input_data.append(each_input_data) out = neural.reg_net(list_input_data) for each in out: plain_text = int(round(each[0] * 100)) if plain_text < 16: reg_plain_text.append(all_plain_text[plain_text]) return reg_plain_text #查找验证码图片位置 def find_location(self, image): image = image.convert("L") image_width = image.size[0] image_height = image.size[1] flag = image_width location = [0, 0] for y in xrange(image_width): for x in xrange(image_height): if image.getpixel((y, x)) != 0: flag = y break if flag != image_width: location[0] = y location[1] = x break return location
class Session: def __init__(self, browser, user): self.browser = Browser(browser) self.browser.visit('http://jizdenky.studentagency.cz/') self.browser.fill_form({'passwordAccountCode': user['login'], 'password': user['password']}) self.browser.execute_script('window.scrollTo(0, 100)') button = self.browser.find_by_value('Přihlásit').first button.click() self.user = user self.log = logging.getLogger(__name__) def go_search(self): self.browser.visit('http://jizdenky.studentagency.cz/') def search(self, task, date_return=None, is_open=False): self.browser.find_by_id('hp_form_itinerar').first \ .find_by_xpath('div/input[@type="radio"]' )[1 if date_return or is_open else 0].check() for city, i in [(task.from_city, 1), (task.to_city, 2)]: self.browser.find_by_css('input[tabindex="{}"]'.format(i)) \ .first.fill(city) for item in self.browser.find_by_css('.ui-menu-item'): link = item.find_by_tag('a') if link.value.lower() == city.lower(): link.click() break self.browser.fill('departure:dateField', task.date) if date_return: self.browser.fill('returnDeparture:dateField', date_return) if is_open: self.browser.check('returnTicketOpen') self.browser.find_option_by_text('ISIC').first.check() self.browser.find_by_value('Vyhledat').first.click() while self.browser.is_element_not_present_by_css('.left_column', wait_time=1): pass items = self.browser.find_by_css('.left_column') \ .find_by_xpath('div/div/*') connections = [] for item in items: if item.tag_name == 'h2': date_local = item.text.split(' ')[1] elif item.tag_name == 'div' and item.has_class('routeSummary'): assert date_local if date_local != task.date: break connections.append(Connection(item)) return connections def order_time(self, connection): while True: if connection.click(): self.browser dialog = self.browser.find_by_css('[id^=_wicket_window]') if dialog: dialog.first.find_by_tag('button').click() if self.browser.is_element_present_by_id('sumary_lines', wait_time=1): break self.browser.find_by_id('sumary_lines') \ .first.find_by_tag('button') \ .first.click() seats = {} bus = self.browser.find_by_css('.seatsContainer') if bus: for seat in bus.first.find_by_css( '.seatContainer:not([style*=blocked])'): seats[int(seat.find_by_tag('div').first.html[:-1])] = seat else: bus = self.browser.find_by_css('.vehicle') for seat in bus.first.find_by_css('.free, .selected'): seats[int(seat.text[:-1])] = seat return seats def order_seat(self, seat): if not seat.has_class('selected'): seat.click() for fs in self.browser.find_by_css('fieldset.topRoute'): legend = fs.find_by_css('legend') if legend and 'Pojištění' in legend[0].text: for package in fs.find_by_css('.insurancePackageType'): if 'nechci' in package.find_by_tag('label').text: package.find_by_tag('input').click() time.sleep(1) submit = self.browser.find_by_css('[name^=buttonContainer]').first interaction_type = submit.text reserved = 'Rezervovat' in interaction_type if not reserved: submit.click() time.sleep(1) data = (self.user['first'], self.user['last'], self.user['email'], self.user['phone']) for item, value in zip(self.browser.find_by_id('passengerInfo') .first.find_by_tag('input'), data): item.fill(value) submit = self.browser.find_by_css('[name^=buttonContainer]').first interaction_type = submit.text assert 'Rezervovat' in interaction_type agreement = self.browser.find_by_css('[name="bottomComponent:termsAgreementCont:termsAgreementCB"]') if agreement: agreement[0].check() time.sleep(1) submit.click() with open('conf.yaml') as f: conf = yaml.load(f) if 'email' in conf: email = conf['email'] while self.browser.is_element_not_present_by_id('ticketPage', wait_time=1): pass msg = MIMEText(self.browser.find_by_id('ticketPage').first.html, 'html') msg['Subject'] = 'SA reservation' msg['From'] = email['from'] msg['To'] = self.user['email'] username = email['username'] password = email['password'] server = smtplib.SMTP(email['server']) server.starttls() server.login(username, b64decode(password).decode()) server.sendmail(msg['From'], msg['To'], msg.as_string()) server.quit()
# -*-coding:utf-8-*- #! usr/bin/python #小学期截课脚本 __author__='Zheng Wu' from time import sleep from splinter import Browser browser=Browser('chrome') url='http://electsys.sjtu.edu.cn/edu/login.aspx' browser.visit(url) sleep(30) browser.visit('http://electsys.sjtu.edu.cn/edu/student/elect/warning.aspx?xklc=2&lb=3') button=browser.find_by_id('CheckBox1') if (browser.is_element_not_present_by_id('CheckBox1')): pass else: button.click() browser.find_by_id('btnContinue').click() while 1: browser.find_by_value('AD001').click() #value为对应的课程代码 browser.find_by_id('lessonArrange').click() browser.find_by_name('myradiogroup').click() browser.find_by_id('LessonTime1_btnChoose').click() if browser.is_element_not_present_by_id('Button1'): browser.find_by_id('btnSubmit').click() print 'successfully get class!' browser.quit() else:
def test_do_login(self): chrome_options = webdriver.ChromeOptions() browser = Browser('chrome', options=chrome_options) with browser: browser.visit(CONST.NPSPSALESFORCE) # Find the page elements needed username_input_list = browser.find_by_id( CONST.LOGIN_USERNAME_INPUT_ID) password_input_list = browser.find_by_id( CONST.LOGIN_PASSWORD_INPUT_ID) submit_login_list = browser.find_by_id(CONST.LOGIN_SUBMIT_LOGIN_ID) # Get the single item from the list returned by 'find_by' username_input = Utilities.verify_one_item_list( username_input_list) password_input = Utilities.verify_one_item_list( password_input_list) submit_input = Utilities.verify_one_item_list(submit_login_list) # Verify these elements are visible and enabled if not username_input.visible: raise Opportunity_Exception( "username_input field found but not visible") if not password_input.visible: raise Opportunity_Exception( "password_input field found but not visible") if not submit_input.visible: raise Opportunity_Exception( "submit_input field found but not visible") # Ready to proceed, enter credentials and submit username_input.fill(CONST.LOGIN_TEST_USER) password_input.fill(CONST.LOGIN_GOOD_PASSWORD) submit_input.click() # NOTE: Should have a wait for the two factor authentication # NOTE: Problem Verifying can occur at this time preventing login therefore test fails with exception while browser.is_element_not_present_by_id(CONST.TWO_FACTOR_ID, 60): if browser.is_text_present(browser, CONST.TWO_FACTOR_CANNOT_SEND): raise Opportunity_Exception( "Two factor authentication mechanism cannot send code at this time; System Failure" ) print("DEBUG: Found CONST.TWO_FACTOR_ID {0}".format( CONST.TWO_FACTOR_ID)) # NOTE: MANUAL operation to enter a code sent via SMS occurs at this point. # -- enter the code # -- click the verify action element/button # TODO: Investigate best practice at Salesforce for test execution with two factor # Need to wait for the Two Factor Auth process to complete and the Home page loads # TODO: Ask about reliability of the selector as a repeatable locator; seemed to stop working, using text # TODO: uncomment # while browser.is_text_present(CONST.HOME_EXPECTED_WELCOME_TEXT, 60): # pass print("DEBUG: Found CONST.TWO_FACTOR_ID {0}".format( CONST.TWO_FACTOR_ID)) # TODO: Handle pop-up about Notifications Block/Allow that can occur at this point # NOTE: Must Verify that the npsp home page has displayed (more than above wait for it) # TODO: this SELECTOR based find stopped working # home_page_welcome_list = browser.find_by_css(CONST.HOME_PAGE_WELCOME_SELECTOR) # home_page_welcome_text = Utilities.verify_one_item_list(home_page_welcome_list) # self.assertEqual(CONST.HOME_EXPECTED_WELCOME_TEXT, home_page_welcome_text.value, # "Not expected welcome text") browser.is_text_present(CONST.HOME_EXPECTED_WELCOME_TEXT, 5)
class WorkerThread(QThread): def __init__(self, nameOfList, username, password, weeksToReserve, form_data_version, parent=None): QThread.__init__(self, parent) self.username = username self.password = password self.nameOfList = nameOfList self.weeks_to_reserve = weeksToReserve self.form_data_version = form_data_version print form_data_version # self.reserve = Reserve() # QThread.__init__(self) def stop_thread(self): self.quit() def run(self): foods, rests_and_day_numbers_of_week, user_list_version = self.decode_database( self.nameOfList ) # decode database # self.reserve.login(self.username,self.password) # check userlist version if user_list_version != self.form_data_version: print user_list_version self.emit(SIGNAL("showQuestion()")) self.emit(SIGNAL("removeThread(QString)"), self.username) return self.stop_thread() try: self.emit(SIGNAL("log(QString)"), self.username + u": رزرو شروع شد") # print self.username,": reservaion started" self.login(self.username, self.password) # login to website except AttributeError: self.emit( SIGNAL("Error(QString,QString)"), u"خطای ورود", u"نام کاربری یا پسورد شما اشتباه است :" + self.username ) self.emit(SIGNAL("log(QString)"), self.username + ":" + u" رزرو ناموفق- نام کاربری یا پسورد شما اشتباه است") self.emit(SIGNAL("removeThread(QString)"), self.username) return self.stop_thread() except Exception as e: self.emit(SIGNAL("Error(QString,QString)"), u"خطا", u"متاسفانه خطایی رخ داد ") # print e self.emit(SIGNAL("removeThread(QString)"), self.username) return self.stop_thread() this_week_available_days, next_week_available_days = self.collect_available_days() # find the available days if "this" in self.weeks_to_reserve: if this_week_available_days: self.reserve_for_a_week( rests_and_day_numbers_of_week=rests_and_day_numbers_of_week, available_days=this_week_available_days, foods=foods, ) if "next" in self.weeks_to_reserve: if next_week_available_days: self.reserve_for_a_week( rests_and_day_numbers_of_week=rests_and_day_numbers_of_week, available_days=next_week_available_days, foods=foods, ) else: self.emit( SIGNAL("log(QString)"), self.username + ":" + "\n" + u" رزرو برای هفته ی بعد هنوز باز نشده است بعدا امتحان کنید ", ) self.take_screenshot(self.username, self.nameOfList) self.open_screenshot(self.username, self.nameOfList) self.emit(SIGNAL("reservation(QString,QString)"), u"انجام شد", u"رزرو برای " + self.username + u" انجام شد ") self.emit(SIGNAL("removeThread(QString)"), self.username) self.emit(SIGNAL("log(QString)"), u" رزرو برای " + self.username + u" انجام شد ") def reserve_for_a_week(self, rests_and_day_numbers_of_week, available_days, foods): day_mapping = [u"شنبه", u"1شنبه", u"2شنبه", u"3شنبه", u"4شنبه", u"5شنبه", u"جمعه"] meal_mapping = [u"صبحانه", u"ناهار", u"شام"] # reserv for a week for rests_and_day_number_of_each_day in rests_and_day_numbers_of_week: check_credit = self.check_credit() if check_credit != True: self.emit( SIGNAL("log(QString)"), self.username + ":" + "\n" + u" اعتبار شما کافی نیست " + str(check_credit) + "-", ) break meal = 0 selected_day = rests_and_day_number_of_each_day["day"] if str(selected_day) in available_days.keys(): # avilable_days.keys == day number of available days for rest in rests_and_day_number_of_each_day["rests"]: if rest == "": self.emit( SIGNAL("log(QString)"), self.username + ":" + "\n" + u" روز " + day_mapping[selected_day] + u" " + meal_mapping[meal] + u" انتخاب نکرده اید ", ) meal += 1 continue foods_of_meal = foods[meal] try: reserved = self.reserve_this(available_days[str(selected_day)], rest, str(meal), foods_of_meal) if reserved == True: self.emit( SIGNAL("log(QString)"), self.username + ":" + "\n" + u" روز " + day_mapping[selected_day] + u" " + meal_mapping[meal] + u" رزرو شد ", ) except: self.emit( SIGNAL("log(QString)"), self.username + ":" + "\n" + meal_mapping[meal] + u" روز " + day_mapping[selected_day] + u" قبلا رزرو شده یا این وعده سرو نمی شود", ) else: if reserved == False: self.emit( SIGNAL("log(QString)"), self.username + ":" + "\n" + meal_mapping[meal] + u" روز " + day_mapping[selected_day] + u" هیچ یک از انتخاب های شما سرو نمی شود ", ) meal += 1 else: self.emit( SIGNAL("log(QString)"), self.username + ":" + "\n" + u" همه ی وعده های روز " + day_mapping[selected_day] + u" قبلا رزرو شده یا سلف تعطیل است ", ) def decode_database(self, name_of_database): json_file = open("./foodlists/" + name_of_database, "r") json_decoded = json.load(json_file) foods = json_decoded["foods"] # export foods rests_and_day_numbers_of_week = json_decoded["restsOfEachDay"] # export days user_list_version = json_decoded["version"] return (foods, rests_and_day_numbers_of_week, user_list_version) def login(self, username, password): self.browser = Browser("phantomjs") self.browser.driver.set_window_size(800, 371) self.wait_time = 2 self.browser.visit("http://sups.shirazu.ac.ir/sfxweb/Gate/Login.aspx") self.browser.is_element_not_present_by_id("edId", self.wait_time) # wait until see the element self.browser.find_by_id("edId").fill(username) self.browser.find_by_id("edPass").fill(password) self.browser.find_by_id("edWebApp").select(1) self.browser.find_by_id("edEnter").click() self.browser.find_by_css("ul.active:nth-child(3) > li:nth-child(2)").click() def reserve_this(self, selected_date, selected_rest, selected_meal, foods_of_meal): # select restaurant rest = self.browser.find_by_id("edRestaurant") self.browser.select("edRestaurant", selected_rest) # select date self.browser.is_element_not_present_by_id("edDate", self.wait_time) # wait until see the element self.browser.select("edDate", selected_date) # select meal self.browser.is_element_not_present_by_id("edMeal", self.wait_time) # wait until see the element meal = self.browser.find_by_id("edMeal") meal.select(selected_meal) # collect serving foods serving_foods = [] self.browser.is_element_not_present_by_id("Food", self.wait_time) # wait until see the element number_of_foods = len(self.browser.find_by_css("#Food li")) for nth in range(1, number_of_foods + 1): value = self.browser.find_by_css("#Food > li:nth-child(" + str(nth) + ") > input:nth-child(1)").value serving_foods.append(value) # select food and reserv reserved = False for food in foods_of_meal: # select food of that meal if str(food) in serving_foods: self.browser.choose("Food", str(food)) self.browser.find_by_id("btnBuyChip").click() reserved = True break return reserved def date_of_this_friday(self): today = datetime.date.today() week_day_of_today_gre = today.weekday() week_day_of_today_per = week_day_of_today_gre + 2 if week_day_of_today_gre < 5 else week_day_of_today_gre - 5 days_to_friday = datetime.timedelta(days=6 - week_day_of_today_per) this_friday = today + days_to_friday return this_friday def collect_available_days(self): dates = self.collect_available_dates() # find the available dates this_friday = self.date_of_this_friday() next_saturday = this_friday + datetime.timedelta(days=1) this_week_available_days = {} next_week_available_days = {} for date in dates: year = int(date[0:4]) month = int(date[4:6]) day = int(date[6:8]) gre_date = jalali.Persian(year, month, day).gregorian_datetime() weekday = gre_date.weekday() day_number = weekday + 2 if weekday < 5 else weekday - 5 if gre_date > this_friday: next_week_available_days[str(day_number)] = date if gre_date < next_saturday: this_week_available_days[str(day_number)] = date return (this_week_available_days, next_week_available_days) def collect_available_dates(self): rest = self.browser.find_by_id("edRestaurant") self.browser.select("edRestaurant", "8") dates = [] number_of_dates = len(self.browser.find_by_css("#edDate >option")) for nth in range(2, number_of_dates + 1): value = self.browser.find_by_css("#edDate > option:nth-child(" + str(nth) + ")").value dates.append(value) return dates def take_screenshot(self, username, nameOfList): # self.browser.back() # self.browser.find_by_css('ul.active:nth-child(3) > li:nth-child(1)').click() self.browser.driver.save_screenshot("./screenshots/" + username + nameOfList[0:-5] + ".png") def open_screenshot(self, username, nameOfList): filename = os.getcwd() + "/screenshots/" + username + nameOfList[0:-5] + ".png" if sys.platform == "win32": os.startfile(filename) else: opener = "open" if sys.platform == "darwin" else "xdg-open" subprocess.call([opener, filename]) def check_credit(self): string_credit = self.browser.find_by_id("lblCredit") int_credit = int(string_credit.text.replace(",", "")) if int_credit < -1500: return int_credit return True
try: b.visit(MINERVA_HOME) b.find_by_id('mcg_un').fill(credentials[0]) b.find_by_id('mcg_pw').fill(credentials[1]) b.find_by_id('mcg_un_submit').click() b.visit(MINERVA_TRANSCRIPT) grades = build_grades() b.visit(MINERVA_LOGOUT) while True: time.sleep(DELAY) b.visit(MINERVA_HOME) if b.is_element_not_present_by_id('mcg_un'): log.warning('Minerva home missing login, trying again in %d seconds' % DELAY) continue b.find_by_id('mcg_un').fill(credentials[0]) b.find_by_id('mcg_pw').fill(credentials[1]) b.find_by_id('mcg_un_submit').click() b.visit(MINERVA_TRANSCRIPT) compare_grades(grades) log.info('Update complete') b.visit(MINERVA_LOGOUT) except Exception: ex_type, ex, tb = sys.exc_info() traces = traceback.format_list(traceback.extract_tb(tb)) message = '' for t in traces:
alert.accept() alert.dismiss() prompt = browser.get_alert() prompt.text prompt.fill_with('text') prompt.accept() prompt.dismiss() # use the with statement to interacte with both alerts and prompts with browser.get_alert() as alert: alert.do_stuff() # Executing javascript browser.execute_script("$('body').empty()") browser.evaluate_script("4+4") == 8 # Matchers browser = Browser() browser.visit('https://splinter.readthedocs.io/') browser.is_text_present('splinter') # True browser.is_text_present('splinter', wait_time=10) # True, using wait_time browser.is_not_present('text not present') # True browser.is_element_present_by_css('h1') browser.is_element_present_by_xpath('//h1') browser.is_element_present_by_tag('h1') browser.is_element_present_by_name('name') browser.is_element_present_by_text('Hello World!') browser.is_element_not_present_by_id('firstheader') browser.is_element_not_present_by_value('query') browser.is_element_present_by_value('query', wait_time=10)
st_len = len(st) for i in range(0, st_len, head_len): content = [] for k, v in st[i:i + head_len]: content.append('{}:{}'.format(k, v)) today_up_stock.append(content) print(today_up_stock) h = pd.DataFrame(today_up_stock) h.to_csv('today_up_info.csv', index=False, header=False, encoding='gb2312') for i in range(len(bl)): b.click_link_by_text(bl[i].text) time.sleep(1) b.driver.switch_to.window(b.driver.window_handles[-1]) while b.is_element_not_present_by_id('htmlContent'): print("wait") time.sleep(1) t = b.find_by_tag('h1') c = b.find_by_xpath('//div[@id="htmlContent"]') f.write(t.text.encode('utf-8')) f.write(c.value.encode('utf-8')) b.windows.current.close()
browser.fill('CN1', survey_code[:5]) browser.fill('CN2', survey_code[5:10]) browser.fill('CN3', survey_code[10:15]) browser.fill('CN4', survey_code[15:20]) browser.fill('CN5', survey_code[20:25]) browser.fill('CN6', survey_code[25]) browser.find_by_name('NextButton').click() while True: spans = browser.find_by_tag("span") button = [] count = 0 for i in spans: # if i.has_class('radioButtonHolder'): # button.append(i) if i.has_class('radioBranded'): button.append(i) count += 1 if count == 5: button[randint(0, len(button) - 1)].click() count = 0 button = [] try: if len(button) < 5 and len(button) > 0: button[randint(0, len(button) - 1)].click() browser.find_by_id('NextButton').click() except: if browser.is_element_not_present_by_id('finishForm'): break # browser.fill('S081000','Aye that\'s pretty good. The drive thru wait wasn\'t too long, and person who took my order spoke clearly and got my order correctly. When I got my food it was warm and the order was correct.')
def scrape(): scraped_data = {} #pointing to the directory where chromedriver exists executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path, headless=False) #visiting the page url = "https://mars.nasa.gov/news/" browser.visit(url) # Wait for article_teaser_body and content_title to load browser.is_element_not_present_by_id("content_title", wait_time=30) browser.is_element_not_present_by_id("article_teaser_body", wait_time=30) #using bs to write it into html html = browser.html soup = bs(html, "html.parser") news_title = soup.find("div", class_="content_title").text news_paragraph = soup.find("div", class_="article_teaser_body").text scraped_data['featured_news'] = { "Title": news_title, "Paragraph": news_paragraph } #Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars). #Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a #variable called `featured_image_url`. #Make sure to find the image url to the full size `.jpg` image. #Make sure to save a complete url string for this image. url_image = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars" browser.visit(url_image) #Getting the base url from urllib.parse import urlsplit base_url = "{0.scheme}://{0.netloc}/".format(urlsplit(url_image)) #Design an xpath selector to grab the image xpath = "//*[@id=\"page\"]/section[3]/div/ul/li[1]/a/div/div[2]/img" #Use splinter to click on the mars featured image #to bring the full resolution image # browser.is_element_not_present_by_xpath(xpath, wait_time=30) results = browser.find_by_xpath(xpath) img = results[0] img.click() browser.is_element_not_present_by_name("fancybox-image", wait_time=30) #get image url using BeautifulSoup html_image = browser.html soup = bs(html_image, "html.parser") img_url = soup.find("img", class_="fancybox-image")["src"] full_img_url = base_url + img_url scraped_data['image_of_the_day'] = {"URL": full_img_url} #Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en) and scrape the latest Mars weather #tweet from the page. Save the tweet text for the weather report as a variable called `mars_weather`. url_weather = "https://twitter.com/marswxreport?lang=en" browser.visit(url_weather) html_weather = browser.html soup = bs(html_weather, "html.parser") #temp = soup.find('div', attrs={"class": "tweet", "data-name": "Mars Weather"}) mars_weather = soup.find( "p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" ).text scraped_data["mars_weather"] = {"data": mars_weather} #Visit the Mars Facts webpage [here](http://space-facts.com/mars/) and use Pandas to scrape the table containing facts about #the planet including Diameter, Mass, etc. #Use Pandas to convert the data to a HTML table string. url_facts = "https://space-facts.com/mars/" browser.visit(url_facts) table = pd.read_html(url_facts) table[0] df_mars_facts = table[0] df_mars_facts.columns = ["Parameter", "Values"] df_mars_facts.set_index(["Parameter"]) mars_html_table = df_mars_facts.to_html() mars_html_table = mars_html_table.replace("\n", "") scraped_data['mars_facts_data'] = {"table": mars_html_table} return scraped_data
class Session: def __init__(self, browser, user): self.browser = Browser(browser) self.browser.visit('http://jizdenky.studentagency.cz/') self.browser.fill_form({ 'passwordAccountCode': user['login'], 'password': user['password'] }) self.browser.execute_script('window.scrollTo(0, 100)') button = self.browser.find_by_value('Přihlásit').first button.click() self.user = user self.log = logging.getLogger(__name__) def go_search(self): self.browser.visit('http://jizdenky.studentagency.cz/') def search(self, task, date_return=None, is_open=False): self.browser.find_by_id('hp_form_itinerar').first \ .find_by_xpath('div/input[@type="radio"]' )[1 if date_return or is_open else 0].check() for city, i in [(task.from_city, 1), (task.to_city, 2)]: self.browser.find_by_css('input[tabindex="{}"]'.format(i)) \ .first.fill(city) for item in self.browser.find_by_css('.ui-menu-item'): link = item.find_by_tag('a') if link.value.lower() == city.lower(): link.click() break self.browser.fill('departure:dateField', task.date) if date_return: self.browser.fill('returnDeparture:dateField', date_return) if is_open: self.browser.check('returnTicketOpen') self.browser.find_option_by_text('ISIC').first.check() self.browser.find_by_value('Vyhledat').first.click() while self.browser.is_element_not_present_by_css('.left_column', wait_time=1): pass items = self.browser.find_by_css('.left_column') \ .find_by_xpath('div/div/*') connections = [] for item in items: if item.tag_name == 'h2': date_local = item.text.split(' ')[1] elif item.tag_name == 'div' and item.has_class('routeSummary'): assert date_local if date_local != task.date: break connections.append(Connection(item)) return connections def order_time(self, connection): while True: if connection.click(): self.browser dialog = self.browser.find_by_css('[id^=_wicket_window]') if dialog: dialog.first.find_by_tag('button').click() if self.browser.is_element_present_by_id('sumary_lines', wait_time=1): break self.browser.find_by_id('sumary_lines') \ .first.find_by_tag('button') \ .first.click() seats = {} bus = self.browser.find_by_css('.seatsContainer') if bus: for seat in bus.first.find_by_css( '.seatContainer:not([style*=blocked])'): seats[int(seat.find_by_tag('div').first.html[:-1])] = seat else: bus = self.browser.find_by_css('.vehicle') for seat in bus.first.find_by_css('.free, .selected'): seats[int(seat.text[:-1])] = seat return seats def order_seat(self, seat): if not seat.has_class('selected'): seat.click() for fs in self.browser.find_by_css('fieldset.topRoute'): legend = fs.find_by_css('legend') if legend and 'Pojištění' in legend[0].text: for package in fs.find_by_css('.insurancePackageType'): if 'nechci' in package.find_by_tag('label').text: package.find_by_tag('input').click() time.sleep(1) submit = self.browser.find_by_css('[name^=buttonContainer]').first interaction_type = submit.text reserved = 'Rezervovat' in interaction_type if not reserved: submit.click() time.sleep(1) data = (self.user['first'], self.user['last'], self.user['email'], self.user['phone']) for item, value in zip( self.browser.find_by_id('passengerInfo').first.find_by_tag( 'input'), data): item.fill(value) submit = self.browser.find_by_css('[name^=buttonContainer]').first interaction_type = submit.text assert 'Rezervovat' in interaction_type agreement = self.browser.find_by_css( '[name="bottomComponent:termsAgreementCont:termsAgreementCB"]') if agreement: agreement[0].check() time.sleep(1) submit.click() with open('conf.yaml') as f: conf = yaml.load(f) if 'email' in conf: email = conf['email'] while self.browser.is_element_not_present_by_id('ticketPage', wait_time=1): pass msg = MIMEText( self.browser.find_by_id('ticketPage').first.html, 'html') msg['Subject'] = 'SA reservation' msg['From'] = email['from'] msg['To'] = self.user['email'] username = email['username'] password = email['password'] server = smtplib.SMTP(email['server']) server.starttls() server.login(username, b64decode(password).decode()) server.sendmail(msg['From'], msg['To'], msg.as_string()) server.quit()
# crawler of the average scores of all classes available in automn 2016 __author__ = 'Zheng Wu' from splinter import Browser import time import re browser = Browser('chrome') url = 'http://electsys.sjtu.edu.cn/edu/login.aspx' browser.visit(url) time.sleep(15) browser.visit( 'http://electsys.sjtu.edu.cn/edu/student/elect/warning.aspx?xklc=3&lb=1') button = browser.find_by_id('CheckBox1') if (browser.is_element_not_present_by_id('CheckBox1')): pass else: button.click() browser.find_by_id('btnContinue').click() # get the code of the classes classlist = [] pattern = re.compile(r'[A-Z]{2}[0-9]{3}') browser.find_by_id('SpeltyRequiredCourse1_btnXuanXk').click() deparlist = [ '02000', '02500', '03000', '03300', '03600', '03700', '04000', '05000', '07000', '07100', '07200', '08000', '08200', '09000', '11000', '12000', '13000', '14000', '14200', '15000', '16000', '17000', '18000', '19000', '20000', '21000', '22000', '23000', '24000', '25100', '26000', '29100', '30000', '33000', '35000', '35100', '36000', '37000', '38000', '39000',
#查找小学期海选时所有的课程的平均分及最高分 __author__='Zheng Wu' from time import sleep from splinter import Browser import re browser=Browser('chrome') url='http://electsys.sjtu.edu.cn/edu/login.aspx' browser.visit(url) print u'你现在有20s的时间输入jaccount' sleep(15) print u'请稍等,本程序稍微有点慢.在这段等待的时间,你可以先打开你的微信^_^' browser.visit('http://electsys.sjtu.edu.cn/edu/student/elect/warning.aspx?xklc=1&lb=3') button=browser.find_by_id('CheckBox1') if (browser.is_element_not_present_by_id('CheckBox1')): pass else: button.click() browser.find_by_id('btnContinue').click() #获取所有课程编码 pattern=re.compile(r'[A-Z]{2}[0-9]{3}') classlist=[] for ele in browser.find_by_tag('td'): if (re.match(pattern,ele.text)): classlist.append(ele.text) #打开微信 wechaturl='http://wechat.shwilling.com/auth/qrcode/login?redirect=http%3A%2F%2Fwechat.shwilling.com%2Fsjtu%2Fcourse' browser.visit(wechaturl)
class NIPExtractor(): def __init__(self): self._browser = Browser('chrome') # self._browser = Browser('chrome', headless=True) self.scraped_data = [] url = 'https://ppuslugi.mf.gov.pl/' self._browser.driver.set_window_size(940, 580) self._browser.visit(url) # link with NIPfinder - going around not loggin findNIP_xpath = '//*[@id="SidebarActions_WebUMn"]/li[3]/a' findNIP = self._browser.find_by_xpath(findNIP_xpath)[0] findNIP.click() def _fill_search_bar(self, a): # znajduje i wypelnia search_bar search_bar_path = "//input[@id='b-7' and @name='b-7']" while self._browser.is_element_not_present_by_xpath(search_bar_path): time.sleep(0.5) search_bar = self._browser.find_by_xpath(search_bar_path) search_bar.fill(a) def _click_by_id(self, button_id): while self._browser.is_element_not_present_by_id(button_id): time.sleep(0.5) button = self._browser.find_by_id(button_id)[0] button.click() def _scrape(self): # szuka informacji i zbiera do zmiennej search_results_xpath = '//*[@id="caption2_b-3"]' while self._browser.is_element_not_present_by_xpath( search_results_xpath): time.sleep(0.5) search_results = self._browser.find_by_xpath( search_results_xpath).first search_results_text = search_results.text return search_results_text def _quit(self): self._browser.quit() def _screenshot_saver(self, nip): # robi i zapisuje screenshot imageMF = self._browser.screenshot(name=nip, suffix='.png') save_path = '/home/izabela/workspace/askMF/printscreens/{}.png'.format( nip) shutil.copyfile(imageMF, save_path) def _esc(self, table): # wyjscie ze strony, wejscie na startowa if len(table) > 0: esc_button_id = 'b-9' self._click_by_id(esc_button_id) def _check(self, nip): nip = str(nip) search_button_id = 'b-8' tab = [] self._fill_search_bar(nip) self._click_by_id(search_button_id) self._scrape() for line in self._scrape().splitlines(): tab.append(line) if len(tab) > 0: comment = tab[0] else: comment = "nie sprawdzono, prawdopodobnie bledny NIP" print(comment) self._screenshot_saver(nip) self.scraped_data.append((nip, comment)) # in tuples self._esc(tab) return self.scraped_data def _save_to_csv(self, data_to_csv): df = pd.DataFrame(data=data_to_csv, columns=['nip', 'comment']) df.to_csv("nipMF.csv") def check_nip(self, nip): result = self._check(nip) self._save_to_csv(self.scraped_data) self._quit() return result def check_list_of_nips(self, nips): for nip in nips: nip = str(nip) result = self._check(nip) self._save_to_csv(result) self._quit() return result