class UserUtils(object): def __init__(self): self.config = config.read_config() self.account = self.config['account'] self.idp_server = self.config['nodes']['idp_node'] # Abort test if esgf-web-fe is not reachable r = requests.get("https://{0}/esgf-web-fe".format(self.idp_server), verify=False, timeout=1) assert r.status_code == 200 self.browser = Browser('firefox') # Mapping user data to fit to web-fe user creation form self.elements = {'firstName' : self.account['firstname'], 'lastName' : self.account['lastname'], 'email' : self.account['email'], 'userName' : self.account['username'], 'password1' : self.account['password'], 'password2' : self.account['password']} def check_user_exists(self): URL = "https://{0}/esgf-web-fe/login".format(self.idp_server) OpenID = "https://{0}/esgf-idp/openid/{1}".format(self.idp_server, self.account['username']) # Try to log in self.browser.visit(URL) self.browser.find_by_id('openid_identifier').fill(OpenID) self.browser.find_by_value('Login').click() # User does not exist if unable to resolve OpenID if(self.browser.is_text_present("Error: unable to resolve OpenID identifier")): self.user_exists = False else: self.user_exists = True def create_user(self): URL = "https://{0}/esgf-web-fe/createAccount".format(self.idp_server) self.browser.visit(URL) # Filling the form for element_name in self.elements: self.browser.find_by_name(element_name).fill(self.elements[element_name]) self.browser.find_by_value('Submit').click() # Parsing response self.response = [] if (self.browser.is_text_present("SUCCESS") == True): self.response.append("SUCCESS") else: self.response.append("FAILURE") selection = self.browser.find_by_tag('span') for sel in selection: if sel.has_class('myerror'): self.response.append(sel.value) def exit_browser(self): self.browser.quit()
def checkin(confirmation, first, last, email, run_time): """Check in to a flight. Arguments: confirmation -- your confirmation number first -- your first name last -- your last name email -- email address to send confirmation run_time -- the time you need to check in """ browser = Browser('zope.testbrowser', ignore_robots=True) browser.visit('https://www.southwest.com/flight/retrieveCheckinDoc.html') browser.fill('confirmationNumber', confirmation) browser.fill('firstName', first) browser.fill('lastName', last) delta = run_time - datetime.now() time.sleep(delta.total_seconds()) submit = browser.find_by_name('submitButton') submit.click() if browser.find_by_id('errors_props_wrapper') != []: browser.quit() raise RuntimeError("Info is incorrect or you checked in too early") check_in = browser.find_by_name('printDocuments') check_in.click() email = browser.find_by_id('optionEmail') email.click() browser.fill('emailAddress', email) cont = browser.find_by_name('book_now') cont.click() browser.quit()
def court_booking_login(user, passwd): """ Create a new browser instance and login to the website """ browser = Browser() browser.visit("https://courtbooking.bayclubs.com") if browser.status_code != 200: logging.error("court_booking_login: Unable to open court booking " "website") browser.quit() return None input_email = browser.find_by_id("InputEmail1") input_email.fill(user) input_passwd = browser.find_by_id("InputPassword1") input_passwd.fill(passwd) login_button = browser.find_by_id("loginButton") login_button.click() if browser.status_code != 200: logging.error("court_booking_login: Error unable to login into court " "booking website") browser.quit() return None if browser.is_element_present_by_id("loginresult", wait_time=5): logging.error("court_booking_login: Incorrect login credentials") browser.quit() return None return browser
def enable(): import time import requests import settings from splinter import Browser from xvfbwrapper import Xvfb print "Trying to enable myself." vdisplay = Xvfb() vdisplay.start() email = settings.getEmail() password = settings.getPassword() team_name = settings.getTeamName() bot_user = settings.getBotUser() browser = Browser('chrome') url = 'https://{}.slack.com/services/{}'.format(team_name, bot_user) browser.visit(url) browser.fill('email', email) browser.fill('password', password) browser.find_by_id('signin_btn').first.click() browser.find_link_by_text('Enable').first.click() time.sleep(2) # Sometimes I saw a crash where there was no alert, so we'll wait a bit first. alert = browser.get_alert() alert.accept() time.sleep(2) # If you close the display too quickly, the request doesn't get processed. vdisplay.stop()
def google_login(user_name, password, code): browser = Browser('firefox') url = 'https://accounts.google.com/ServiceLogin' browser.visit(url) browser.find_by_id('Email').fill(user_name) browser.find_by_id('next').click() browser.find_by_id('Passwd').fill(password) browser.find_by_id('signIn').click() url1 = 'https://play.google.com/store?hl=jp' browser.visit(url1) browser.find_by_css('button.id-no-menu-change').click() time.sleep(1) browser.find_by_css('input.redeem-input-text-box').fill(code) browser.find_by_id('id-redeem-ok-button').click() time.sleep(2) result = browser.find_by_css('div.redeem-invalid-code-msg').value browser.quit() return result
class LoginTestCase(unittest.TestCase): def setUp(self): self.testbed = testbed.Testbed() self.testbed.activate() self.testbed.init_datastore_v3_stub() self.testbed.init_memcache_stub() self.browser = Browser('chrome') def tearDown(self): self.testbed.deactivate() def test_login(self): self.browser.visit("http://127.0.0.1:8080/") self.assertEqual(self.browser.find_by_tag("h3").first.text, "Not logged in") self.browser.find_by_id("submit-login").first.click() self.assertEqual(self.browser.find_link_by_text("Insurance").first.text, "Insurance") def test_logout(self): self.browser.visit("http://127.0.0.1:8080/") self.assertEqual(self.browser.find_by_tag("h3").first.text, "Not logged in") self.browser.find_by_id("submit-login").first.click() self.assertEqual(self.browser.find_link_by_text("Insurance").first.text, "Insurance") self.browser.find_link_by_text("Log out").first.click() self.assertEqual(self.browser.find_by_tag("h3").first.text, "Not logged in")
class UserUtils(object): def __init__(self): self.config = config.read_config() self.account = self.config['account'] self.idp_server = self.config['nodes']['idp_node'] # Abort test if esgf-web-fe is not reachable r = requests.get("https://{0}/user/add".format(self.idp_server), verify=False, timeout=1) assert r.status_code == 200 self.browser = Browser('firefox') # Mapping user data to fit to web-fe user creation form self.elements = {'first_name' : self.account['firstname'], 'last_name' : self.account['lastname'], 'email' : self.account['email'], 'username' : self.account['username'], 'password' : self.account['password'], 'confirm_password' : self.account['password'], 'institution' : self.account['institution'], 'city' : self.account['city'], 'country' : self.account['country']} def check_user_exists(self): URL = "https://{0}/login".format(self.idp_server) OpenID = "https://{0}/esgf-idp/openid/{1}".format(self.idp_server, self.account['username']) # Try to log in self.browser.visit(URL) self.browser.find_by_id('openid_identifier').fill(OpenID) self.browser.find_by_value('Login').click() # User does not exist if unable to resolve OpenID if(self.browser.is_text_present("OpenID Discovery Error: unrecognized by the Identity Provider.")): self.user_exists = False else: self.user_exists = True def create_user(self): URL = "https://{0}/user/add".format(self.idp_server) self.browser.visit(URL) # Filling the form for element_name in self.elements: self.browser.find_by_name(element_name).fill(self.elements[element_name]) self.browser.find_by_value('Submit').click() # Parsing response self.response = [] if (self.browser.is_text_present("Thank you for creating an account. You can now login.") == True): self.response.append("SUCCESS") else: self.response.append("FAILURE") def exit_browser(self): self.browser.quit()
def browse(url): browser = Browser("phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any']) browser.visit(url) browser.find_by_id('RadioW03').first.click() browser.find_by_id('RadioS03').first.click() browser.find_by_text('\n Search\n ').first.click() return browser
def __scrape(self, landing_page): browser = Browser('chrome', executable_path='C:\Python27\Lib\site-packages\chromedriver_win32\chromedriver.exe', service_args=PROXIES) # browser = Browser('phantomjs', service_args=PROXIES, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0') with browser: template1 = True browser.visit(landing_page) time.sleep(2) nav = [x for x in browser.find_by_css('a.nav') if (x.text == 'Jobs by Location' or x.text == 'By Location')] if len(nav) > 0: nav[0].click() else: template1 = False link = browser.url state_index = 1 city_index = 1 while True: browser.visit(link) if not template1: nav = browser.find_by_css('#tabHeader') nav = nav.find_by_css('a') nav[1].click() states = browser.find_by_name('search.stateList.value') state_list = states.find_by_tag('option') print state_list[state_index].text state_list[state_index].click() if state_list[state_index].text != 'choose one...': element = 'cityList_' + state_list[state_index].text cities = browser.find_by_id(element) city_list = cities.find_by_tag('option') city_list[city_index].click() if city_list[city_index].text != 'choose one...': print city_list[city_index].text, state_list[state_index].text browser.find_by_id('cityStateSearch').click() links = None try: links = browser.find_by_css('a.withBubble') except: pass if len(links) > 0: for i in links: b = Browser('chrome', executable_path='C:\Python27\Lib\site-packages\chromedriver_win32\chromedriver.exe', service_args=PROXIES) # b = Browser('phantomjs', service_args=PROXIES, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0') with b: b.visit(i['href']) self.__navigate_pages(b) else: self.__navigate_pages(browser) city_index += 1 if city_index == len(city_list): city_index = 0 state_index += 1 if state_index == len(state_list): break else: state_index += 1
def get_claim_code_from_server(): browser = Browser('phantomjs', service_args=['--ignore-ssl-errors=true']) browser.visit(ROOT_ADDRESS + "/merchant-login") browser.fill_form({"email": USER_NAME, "password": PASSWORD}) browser.find_by_id("loginButton")[0].click() time.sleep(5) browser.visit(ROOT_ADDRESS + "/api-tokens") browser.find_by_css(".token-access-new-button").find_by_css(".btn").find_by_css(".icon-plus")[0].click() browser.find_by_id("token-new-form").find_by_css(".btn")[0].click() return browser.find_by_css(".token-claimcode")[0].html
class TrackListScraper(object): def __init__(self, artists, year): self.browser = Browser('chrome') self.artists = artists self.year = year self.browser.visit('http://1001tracklists.com') def execute_full_scrape(self): artist_tracklists = {} for artist in self.artists: artist_tracklists[artist] = self.scrape_per_artist(artist) self.browser.quit() return artist_tracklists def scrape_per_artist(self, artist): """Execute the same scrape but instead using the python splinter library """ self.browser.fill('main_search', artist + ' edc ' + self.year) self.browser.find_by_id('btn_search').first.click() try: self.browser.click_link_by_partial_text('2014-06-') track_strings = self.get_track_list_for_set(artist) return track_strings except ElementDoesNotExist: pass def get_track_list_for_set(self, artist): soup = BeautifulSoup(self.browser.html) track_values = soup.find_all('div', class_='trackValue') track_strings = [] file = open('tracklist-' + artist + '-edc' + self.year, 'w') for track in track_values: if track.a: track_string = track.a.string file.write(track_string) # track details in format [artist, trackname] track_details = self.parse_track_string(track_string) track_strings.append(track_details) file.close() return track_strings def parse_track_string(self, track_string): track_info = track_string.strip().split('-') for i in range(len(track_info)): track_info[i] = track_info[i].strip() return track_info
def test_0_http_browser_download(self): path = self.get_endpoint_path('HTTPServer') url = "http://{0}/thredds/fileServer/{1}".format(self.data_node, path) OpenID = "https://{0}/esgf-idp/openid/{1}".format(self.idp_node, self.username) pf={'browser.helperApps.neverAsk.saveToDisk':'application/x-netcdf, application/netcdf'} browser = Browser('firefox', profile_preferences=pf) browser.visit(url) if browser.status_code.is_success() is True: browser.quit() return browser.find_by_id('openid_identifier').fill(OpenID) browser.find_by_value('GO').click() browser.find_by_id('password').fill(self.password) browser.find_by_value('SUBMIT').click() # To Do only if user is not enrolled in a group if browser.is_text_present('Group Registration Request'): # Chosing First Registration Group browser.find_by_id('button_1').click() # Accepting License Agreement browser.execute_script('myForm.submit();') # Clicking on 'Download data button' browser.find_by_id('goButton').click() browser.quit()
def twitter_login(username, password): """ Log in to Twitter and returns browser object """ browser = Browser() # Login browser.visit("https://www.twitter.com/") browser.find_by_id("signin-email").first.value = username browser.find_by_id("signin-password").first.value = password browser.find_by_css(".js-submit").first.click() return browser
def passwd(self): if len(self.login) < 1 or len(self.panic) < 1 or len(self.user) < 1: return false b = Browser() b.driver.set_window_size(900,900) try: b.visit("https://accounts.google.com/ServiceLogin?service=accountsettings") b.fill('Email',self.user) btn = b.find_by_id("next") btn.click() b.fill('Passwd',self.login) btn = poll_for("#signIn", b) btn.click() b.visit("https://myaccount.google.com/security#signin") btn = b.find_by_css(".vkq40d").first if not btn == None: print "not none" btn.click() poll_fill('Email',self.user, b) btn = b.find_by_id("next") btn.click() poll_fill('Passwd',self.login, b) btn = b.find_by_id("signIn") btn.click() time.sleep(INV) btn = poll_for(".TCRTM", b) btn.click() poll_fill('Passwd',self.login, b) btn = b.find_by_id("signIn") btn.click() p = poll_for(".Hj", b) p.fill(self.panic) p = b.find_by_css(".Hj")[1] p.fill(self.panic) btn = b.find_by_css(".Ya") btn.click() time.sleep(INV*5) b.quit() except: traceback.print_exc(file=sys.stdout) raw_input("Something went wrong...") b.quit() if self.attempts < self.max_attempts: self.attempts += 1 self.passwd()
def check_in(self, conf_number, first_name, last_name): browser = Browser('phantomjs') browser.visit('https://www.southwest.com/') checkin_form_button = browser.find_by_id('booking-form--check-in-tab')[0] checkin_form_button.click() browser.fill('confirmationNumber', conf_number) browser.fill('firstName', first_name) browser.fill('lastName', last_name) checkin_button = browser.find_by_id('jb-button-check-in')[0] checkin_button.click() submit_button = browser.find_by_id('submitButton')[0] submit_button.click()
def passwd(self): b = Browser() b.driver.set_window_size(900, 900) b.visit("https://twitter.com") btn = b.find_by_css(".js-login") btn.click() b.find_by_id("signin-email").fill(self.user) b.find_by_id("signin-password").fill(self.login) btn = b.find_by_value("Log in") btn.click() b.visit("https://twitter.com/settings/password") b.fill("current_password", self.login) b.fill("user_password", self.panic) b.fill("user_password_confirmation", self.panic) btn = b.find_by_text("Save changes") btn.click() b.quit()
def splinter(url): #"""""""""""""""""""""""""MySQL DEF********************************************** conn = MySQLdb.connect(host='192.168.1.8',user='******',passwd='123123',db='gwycf') cursor = conn.cursor()#create cursor operate db #"""""""""""""""""""""""""MySQL DEF********************************************** data = xlrd.open_workbook('./chafen.xlsx') table = data.sheets()[0] nrows = table.nrows ncols = table.ncols print nrows browser = Browser('firefox') # browser = Browser('chrome') dir(browser) browser.visit(url) time.sleep(5) count = 0 #<================================================> for i in range(nrows): #HaoMa = str(table.row_values(i)[1]).split(".")[0] name = table.row_values(i)[0] HaoMa = table.row_values(i)[1] # epost = table.row_values(i)[2] browser.find_by_name('TxtName').fill(name) browser.find_by_name('TxtHaoMa').fill(HaoMa) browser.find_by_id('btnSubmit').click() #=================获取页面数据===================== epost = browser.find_by_tag('td')[10].value ecode = browser.find_by_tag('td')[14].value xingce = browser.find_by_tag('td')[16].value shenlun = browser.find_by_tag('td')[18].value jiafen = browser.find_by_tag('td')[20].value zongfen = browser.find_by_tag('td')[22].value #=================获取页面数据====================== query = u"insert into info values('%s','%s','%s','%s','%s','%s','%s','%s',0)" % (name,HaoMa,epost,ecode,xingce,shenlun,jiafen,zongfen) print count,query cursor.execute(query.encode('utf-8')) #原始数据可以根据gbk运行无错,现在改成utf8 conn.commit() browser.back() count = count +1 cursor.close() conn.commit() conn.close()
def getAuth(): import requests import settings from time import sleep from splinter import Browser from xvfbwrapper import Xvfb vdisplay = Xvfb(width=3840, height=2160) vdisplay.start() client_id = settings.getClientID() email = settings.getEmail() password = settings.getPassword() team_name = settings.getTeamName() scopes = ('users:read', 'channels:write', 'channels:history', 'channels:read', 'chat:write:bot', 'chat:write:bot', 'chat:write:bot', 'dnd:write', 'dnd:read', 'emoji:read', 'files:write:user', 'files:read', 'groups:write', 'groups:history', 'groups:read', 'im:write', 'im:history', 'im:read', 'mpim:write', 'mpim:history', 'mpim:read', 'pins:write', 'pins:read', 'reactions:write', 'reactions:read', 'reminders:write', 'reminders:read', 'search:read', 'stars:write', 'stars:read', 'team:read', 'usergroups:write', 'usergroups:read', 'users:write', 'admin') scopeStr = '+'.join(scopes) browser = Browser('chrome') url = 'https://slack.com/oauth/authorize?client_id={}&scope={}&state=test'.format(client_id, scopeStr) browser.visit(url) browser.fill('domain', team_name) browser.find_by_id('submit_team_domain').first.click() browser.fill('email', email) browser.fill('password', password) browser.find_by_id('signin_btn').first.click() button = browser.find_by_id('oauth_authorizify') button.mouse_over() button.click() codeStart = browser.url.find('=') + 1 codeEnd = browser.url.find('&') code = browser.url[codeStart:codeEnd] browser.quit() vdisplay.stop() return code
def main(argv): try: opts, args = getopt.getopt(sys.argv[1:], "f:") except getopt.GetoptError as err: sys.exit(2) global test_config config_file_name = None for o, a in opts: if o == "-f": config_file_name = a else: assert False, "unhandled option" if config_file_name is None: usage(argv) sys.exit(1) config_file = open(config_file_name, "r") test_config = json.load(config_file) browser = Browser() browser.visit(test_config['url']) # log in browser.find_by_id('id_username').fill(test_config['username']) browser.find_by_id('id_password').fill(test_config['password']) browser.find_by_id('dijit_form_Button_0_label').click() l = browser.find_by_xpath("//div[@id='treeNode_reboot']/div") time.sleep(2) l.first.click() # The button labelled Cancel in the code is actually the button which does the Reboot! l = browser.find_by_id("btn_Reboot_Cancel") time.sleep(2) l.first.click() time.sleep(5) browser.quit()
def get_login(url): browser = Browser("phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any']) browser.visit(url) browser.find_by_id('username').fill('*****@*****.**') # MORPH #browser.find_by_id('username').fill('*****@*****.**') #browser.find_by_id('username').fill('*****@*****.**') #browser.find_by_id('username').fill('*****@*****.**') # Morph uk browser.find_by_id('password').fill('Nrjn1gsa') browser.find_by_name('submit').first.click() time.sleep(1) print browser.url browser.click_link_by_href("/business/opportunitySearchForm.html") time.sleep(1) browser.select('status', "") browser.find_by_value("Search").first.click() time.sleep(2) print browser.url return browser
def run(self): browser = Browser('chrome', executable_path="C:\Python27\Lib\site-packages\chromedriver_win32\chromedriver.exe", service_args=PROXIES) # browser = Browser('phantomjs', service_args=PROXIES, user_agent="Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0") with browser: page = 1 browser.visit(self.url) browser.fill("p", self.keyword) browser.find_by_id("search-submit").click() while True: time.sleep(10) logging.info("Page " + str(page)) for link in browser.find_by_css("div.res"): if "applicationname" in link.find_by_css("a").first["href"].lower(): self.queue.put(link.find_by_css("a").first["href"]) page += 1 if browser.find_by_css("#pg-next"): browser.find_by_css("#pg-next").click() else: break self.queue.put(None)
def main(argv): try: opts, args = getopt.getopt(sys.argv[1:], "f:") except getopt.GetoptError as err: sys.exit(2) global test_config config_file_name = None for o, a in opts: if o == "-f": config_file_name = a else: assert False, "unhandled option" if config_file_name is None: usage(argv) sys.exit(1) config_file = open(config_file_name, "r") test_config = json.load(config_file) browser = Browser() browser.visit(test_config['url']) browser.find_by_id('id_password').fill(test_config['password']) browser.find_by_id('id_confirm_password').fill(test_config['password']) browser.find_by_id('dijit_form_Button_0_label').click() browser.quit()
def makeChouseisan(listOfDays, weekdayTime, weekendTime, autosubmit=True): # TODO: add Title, Comment as arguments browser = Browser('chrome') browser.visit("https://chouseisan.com/schedule/newEvent/create") browser.find_by_id("name").fill("Title") browser.find_by_id("comment").fill("Comment") browser.find_by_id("kouho").fill("\n".join([formatDay(day, weekdayTime, weekendTime) for day in listOfDays])) if autosubmit: browser.find_by_id("createBtn").first.click() while len(browser.windows) > 0: time.sleep(10)
def test_0_http_browser_download(self): path = self.get_endpoint_path("HTTPServer") url = "http://{0}/thredds/fileServer/{1}".format(self.data_node, path) OpenID = "https://{0}/esgf-idp/openid/{1}".format(self.idp_node, self.username) pf = {"browser.helperApps.neverAsk.saveToDisk": "application/x-netcdf, application/netcdf"} browser = Browser("firefox", profile_preferences=pf) browser.visit(url) if browser.status_code.is_success() is True: browser.quit() return browser.find_by_css("input.custom-combobox-input").fill(OpenID) browser.find_by_value("GO").click() browser.find_by_id("password").fill(self.password) browser.find_by_value("SUBMIT").click() # To Do only if user is not enrolled in a group if browser.is_text_present("Group Registration Request"): # Chosing First Registration Group browser.find_by_id("button_1").click() # Accepting License Agreement browser.execute_script("myForm.submit();") # Clicking on 'Download data button' browser.find_by_id("goButton").click() browser.quit()
def main(argv): try: opts, args = getopt.getopt(sys.argv[1:], "f:") except getopt.GetoptError as err: sys.exit(2) global test_config config_file_name = None for o, a in opts: if o == "-f": config_file_name = a else: assert False, "unhandled option" if config_file_name is None: usage(argv) sys.exit(1) config_file = open(config_file_name, "r") test_config = json.load(config_file) browser = Browser() browser.visit(test_config['url']) e = browser.find_by_xpath("//div[@id='treeNode_account']/div/span") e.click() e = browser.find_by_xpath("//div[@id='treeNode_account.AdminAccount']/div/span") e.click() e = browser.find_by_xpath("//div[@id='treeNode_account.AdminAccount.ChangePass']/div/span[3]/span[2]") time.sleep(1) e.click() browser.find_by_id('id_new_password').fill(test_config['password']) browser.find_by_id('id_new_password2').fill(test_config['password']) browser.find_by_id('btn_PasswordChangeForm_Ok_label').click()
def get_login(url): browser = Browser("phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any']) browser.visit(url) #browser.find_by_id('username').fill('*****@*****.**') # MORPH #browser.find_by_id('username').fill('*****@*****.**') #browser.find_by_id('username').fill('*****@*****.**') browser.find_by_id('username').fill('*****@*****.**') # MORPH UK browser.find_by_id('password').fill('Nrjn1gsa') browser.find_by_name('submit').first.click() time.sleep(1) print browser.url try: browser.click_link_by_href("/business/opportunitySearchForm.html") time.sleep(1) browser.click_link_by_href("opportunityAdvancedSearchForm.html") time.sleep(2) #browser.find_by_value('All').first.click() browser.select('status', "") browser.select('area', "9") # 'area' is 'class name' not just name? time.sleep(3) print browser.find_by_value('Add All') #TODO print browser.html browser.find_by_value('Add All').first.click() print 'added all England only' #TODO time.sleep(2) browser.find_by_value("Search").first.click() time.sleep(2) except Exception as e: print 'error: ', e browser.click_link_by_href("/business/logoutHosts.html") time.sleep(4) browser.quit() sys.exit("login failed") print browser.url return browser
def _browser(self, qry, locale=None, country=None): # location browser = Browser('phantomjs') if country == "Canada": browser.visit('http://jobsearch.monster.ca/') else: browser.visit('http://jobsearch.monster.com/') time.sleep(1) #qry = "inside sales" #browser.find_by_css('.b1 > input').first.fill('"{0}"'.format(qry)) if qry[0] == '"' and qry[-1] == '"': browser.find_by_css('.b1 > input').first.fill('{0}'.format(qry)) elif qry[0] == "'" and qry[-1] == "'": browser.find_by_css('.b1 > input').first.fill('{0}'.format(qry)) else: browser.find_by_css('.b1 > input').first.fill('"{0}"'.format(qry)) browser.find_by_css('.b1 > input').first.fill('{0}'.format(locale)) browser.find_by_css('.searchButton').first.click() time.sleep(1) browser.find_by_css('#sort-by').first.click() browser.find_by_id('sort-by-dt.rv.di').first.click() return browser
def passwd(self): if len(self.login) < 1 or len(self.panic) < 1 or len(self.user) < 1: return false b = Browser() b.visit("https://accounts.google.com") b.fill("Email", self.user) btn = b.find_by_id("next") btn.click() b.fill("Passwd", self.login) btn = b.find_by_id("signIn") btn.click() b.visit("https://myaccount.google.com/security/signinoptions/password") b.fill("Passwd", self.login) btn = b.find_by_id("signIn") btn.click() p = b.find_by_css(".Hj").first p.fill(self.panic) p = b.find_by_css(".Hj")[1] p.fill(self.panic) btn = b.find_by_css(".Ya") btn.click() b.quit()
def splinter(url): browser = Browser() browser.visit(url) time.sleep(5) browser.find_by_id(userNameID).fill('') browser.find_by_id(pwdID).fill('') browser.find_by_id(loginBtnID).click() time.sleep(8) browser.quit()
def scrape(): TEST = False scraped_data = {} # Set up browser with chromedriver executable executable_path = {"executable_path": "/usr/local/bin/chromedriver"} browser = Browser("chrome", **executable_path, headless=False) # Visit first scraping target [NASA Mars News] and set up parser news_url = "https://mars.nasa.gov/news/" browser.visit(news_url) browser.find_by_css(".item_list").first.find_by_tag("a").click() news_html = browser.html soup = BeautifulSoup(news_html, 'html.parser') # Collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later news_title = soup.find("h1", {"class": "article_title"}).get_text().strip() news_paragraph = soup.find("div", {"class": "wysiwyg_content"}).p.get_text() # Test results if TEST: print(news_title) print(news_paragraph) # Store results in dict scraped_data["news_title"] = news_title scraped_data["news_paragraph"] = news_paragraph # Visit second scraping target [JPL Mars Space Images - Featured Image] images_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars" browser.visit(images_url) browser.find_by_id("full_image").click() # Button may not load immediately causing an error, loop until it appears while browser.is_element_not_present_by_text("more info ", wait_time=None): pass browser.find_by_text("more info ").click() # Select full size image in order to obtain url browser.find_by_css(".main_image").click() featured_image_url = browser.url # Test results if TEST: print(featured_image_url) # Store results in dict scraped_data["feat_image_url"] = featured_image_url # Visit third scraping target [Mars Weather] weather_url = "https://twitter.com/marswxreport?lang=en" browser.visit(weather_url) # Set up parser weather_html = browser.html soup = BeautifulSoup(weather_html, 'html.parser') # Remove child <a> in order to exclude twitter url soup.find("p", {"class": "TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"}).a.extract() # Get weather tweet mars_weather = soup.find("p", {"class": "TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"}).get_text() # Test results if TEST: print(mars_weather) # Store results in dict scraped_data["weather_tweet"] = mars_weather # Visit fourth scraping target [Mars Facts] facts_url = "https://space-facts.com/mars/" # Parse table with pandas.read_html and export table to a html string facts_df = pd.read_html(facts_url, attrs={"id": "tablepress-mars"})[0] facts_html = facts_df.to_html(index=False) # Test results if TEST: print(facts_html) # Store results in dict scraped_data["facts_html_table"] = facts_html # Visit fifth scraping target [Mars Hemispheres] hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars" link_texts = ["Cerberus Hemisphere Enhanced", "Schiaparelli Hemisphere Enhanced", "Syrtis Major Hemisphere Enhanced", "Valles Marineris Hemisphere Enhanced"] hemisphere_image_urls = [] hemisphere_titles = ["Cerberus Hemisphere", "Schiaparelli Hemisphere", "Syrtis Major Hemisphere", "Valles Marineris Hemisphere"] for i, link_text in enumerate(link_texts): # Visit each hemisphere's page browser.visit(hemispheres_url) browser.find_by_text(link_text).click() # Find and extract URL for each full-size image hemisphere_html = browser.html soup = BeautifulSoup(hemisphere_html, 'html.parser') hemisphere_image_urls.append({"title": hemisphere_titles[i], "img_url": soup.find(string="Sample").findParent()["href"]}) # Test results if TEST: for url in hemisphere_image_urls: for key in url: print(key, ":", url[key]) # Store results in dict scraped_data["hemisphere_urls"] = hemisphere_image_urls return scraped_data
print(f"Paragraph: {news_paragraph}") # In[13]: #PL Mars Space Images - Featured Image url_image = "https://www.jpl.nasa.gov/spaceimages/?search=&category=featured#submit" browser.visit(url_image) # In[14]: #Finding image full_image_button = browser.find_by_id("full_image") full_image_button.click() # In[15]: browser.is_element_present_by_text("more info", wait_time=1) more_info_element = browser.find_link_by_partial_text("more info") more_info_element.click() # In[16]: html = browser.html
# Find and click on the 'Intern' positions from the website. option = browser.find_by_xpath('//select[@id="com.peopleclick.cp.formdata.JPM_DURATION"]//option[@value="7"]') option.click() # Javascript injection to unselect the option for 'all' positions. # Without this, the webpage will still load all the open positions from the site. browser.execute_script('document.getElementsByTagName("select")[3].getElementsByTagName("option")[0].selected = false') # Select the most results per page that we can display. This is to be a quick # and easy method for getting the whole list of internships currently available. browser.execute_script('document.getElementById("com.peopleclick.cp.formdata.hitsPerPage").getElementsByTagName("option")[0].selected = false') browser.execute_script('document.getElementById("com.peopleclick.cp.formdata.hitsPerPage").getElementsByTagName("option")[3].selected = true') # Find and click the 'Search' button from the website. button = browser.find_by_id('sp-searchButton') button.click() # Pause for bit to let things load due to potentially bad connections. sleep(2) # Extract the job positions as titles from the website. positions = browser.find_by_css('div[class=" pf-sr-titleInnerWrapper"] > a') # Extract the locations of the each of the jobs. locations = browser.find_by_css('div[class="clearfix col-xs-12 col-sm-7 col-md-8 pf-padding-left pf-rwd-titlefieldsbox"] > div > span[id="com.peopleclick.cp.fieldlabel.index_0.JPM_LOCATION_value"]') # Extract the brief descriptions from the job posting, this does not work currently. # descriptions = browser.find_by_css('div[class="col-xs-12 visible-xs pf-paddingzero pf-rwd-jobPostDecription pf-rwd-wordwrap"] > span[id="com.peopleclick.cp.fieldlabel.index_0.JPM_DESCRIPTION_value"] > span[class="ng-binding"]') # We will store the relevant job data into a list of dictionaries for our data # structure. job_list = []
month_selector_value = soup.find_all("select")[1].find_all("option") years = [int(year.text) for year in year_selector_value] months = [month["value"] for month in month_selector_value] # ======================================================================================================== # loop through each year and month state_data = [] unemp_data = [] year_data = [] month_data = [] for year in years[-2:]: for month in months: # select year and month browser.find_by_id("year").select(year) browser.find_by_id("period").select(month) # click draw map browser.find_by_id('btn_sumbit').click() time.sleep(0.5) # table content html = browser.html soup = bs(html, "html5lib") # try scrape table info if there is try: table = soup.find("table", { "id": "tb_data" }).find("tbody").find_all("tr") state = [t.find_all("td")[0].text for t in table] unemp = [t.find_all("td")[1].text for t in table]
def scrape(): # Dependencies from splinter import Browser from bs4 import BeautifulSoup as bs import requests import pymongo import pandas as pd import time #chromebrowser set up executable_path = {"executable_path": "chromedriver"} browser = Browser("chrome", **executable_path, headless=False) #URL's to scrape url1 = 'https://mars.nasa.gov/news/' url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' url3 = 'https://twitter.com/marswxreport?lang=en' url4 = 'http://space-facts.com/mars/' url5 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' # ************************************************************************** # NASA Mars News # Scrape https://mars.nasa.gov/news/ and get the latest news title and text # ************************************************************************** #Nasa Mars News browser.visit(url1) time.sleep(1) # Retrieve page with the requests module response = browser.html soup = bs(response, 'html.parser') # Retrieve the latest element that contains news title and news_paragraph news_title = soup.find('div', class_='content_title').find('a').text news_p = soup.find('div', class_='article_teaser_body').text print(news_title) print(news_p) # ************************************************************************** # JPL Mars Space Images - Featured Image # Scrape https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars and get the link for the larget carousel image # ************************************************************************** # Access full image from JPL Mars Space Images browser.visit(url2) f1 = browser.find_by_id("full_image") f1.click() time.sleep(1) browser.click_link_by_partial_text('more info') # Retrieve page with the requests module response2 = browser.html soup2 = bs(response2, 'html.parser') #main_url = main one for in front main_url = 'https://www.jpl.nasa.gov' page_url = soup2.find(class_='lede').find('a').get('href') featured_image_url = main_url + page_url print(featured_image_url) # ************************************************************************** # Mars Weather # Scrape https://twitter.com/marswxreport?lang=en and get and save the westher tweets # ************************************************************************** # Retrieve page with the requests module to get Mars Weather response3 = requests.get(url3) soup3 = bs(response3.text, 'html.parser') mars_weather_find = soup3.find_all('div', class_='js-tweet-text-container') mars_weather = (mars_weather_find[0].p.text) # ************************************************************************** # Mars Facts # Scrape https://space-facts.com/mars/ and get and save the facts to an HTML table string # # ************************************************************************** #Get Mars Facts tables = pd.read_html(url4)[0] tables.columns = ('fact', 'figure') tables.set_index('fact', inplace=True) tables #Put Mars facts in HTML table string table_string = tables.to_html() print(table_string) # ************************************************************************** # Mars Hemispheres # Scrape https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars and # get and save the high resolution images for each of Mars hemispheres # ************************************************************************** # Visit and get Mars Hemispheres information - URL and title of each Hemisphere hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' browser.visit(hemispheres_url) html_hemispheres = browser.html # Parse HTML with Beautiful Soup soup = bs(html_hemispheres, 'html.parser') # Retreive all items that contain mars hemispheres information items = soup.find_all('div', class_='item') # Create empty list for hemisphere urls hemisphere_image_urls = [] # Store the main_ul main_url = 'https://astrogeology.usgs.gov' # Loop through the items previously stored for i in items: # Store title title = i.find('h3').text.strip('Enhanced') # Store link that leads to full image website partial_img_url = i.find('a', class_='itemLink product-item')['href'] # Visit the link that contains the full image website browser.visit(main_url + partial_img_url) # HTML Object partial_img_html = browser.html # Parse HTML with Beautiful Soup soup = bs( partial_img_html, 'html.parser') # Retrieve full image source img_url = main_url + soup.find('img', class_='wide-image')['src'] # Append to dictionaries hemisphere_image_urls.append({"title" : title, "img_url" : img_url}) # Create dictionary mars = [] mars.append({"news_title" : news_title, "news_short" : news_p, "featured_image_url" : featured_image_url, "mars_weather" : mars_weather, "mars_facts" : table_string, "hemispheres_urls" : hemisphere_image_urls}) # return data return mars
from splinter import Browser print 'Starting...' browser = Browser('firefox') # using firefox browser.visit("http://portal.ku.edu.kw/sisapp/faces/login.jspx") browser.fill('username', 'xxxxx') # enter student ID browser.fill('password', 'yyyyy') # enter password browser.find_by_id('loginBtn').click() # click login
class SplinterBrowserDriver(BaseBrowserDriver): """ This is a BrowserDriver for splinter (http://splinter.cobrateam.info) that implements the BaseBrowserDriver API. To use it, you must have splinter installed on your env. For itself it's a browser driver that supports multiple browsing technologies such as selenium, phantomjs, zope, etc. """ driver_name = 'splinter' def __init__(self, *args, **kwargs): _args = args or (config.default_browser, ) super(SplinterBrowserDriver, self).__init__() if not splinter_available: raise ImportError( "In order to use splinter Base Driver you have to install it. " "Check the instructions at http://splinter.cobrateam.info") self._browser = Browser(*_args, **kwargs) def _handle_empty_element_action(self, element): if not element: raise ActionNotPerformableException( "The action couldn't be perfomed because the element couldn't " "be found; Try checking if your element" "selector is correct and if the page is loaded properly.") @property def page_url(self): return self._browser.url @property def page_source(self): return self._browser.html @property def page_title(self): return self._browser.title def open_url(self, url): self._browser.driver.get(url) def close(self): return self._browser.driver.close() def quit(self): return self._browser.quit() def is_element_visible(self, element): return element.visible def get_element_text(self, element): return element.text def get_element_by_xpath(self, selector): return self._browser.find_by_xpath(selector) def get_element_by_css(self, selector): return self._browser.find_by_css(selector) def get_element_by_id(self, selector): return self._browser.find_by_id(selector) def get_element_by_tag(self, selector): return self._browser.find_by_tag(selector) @element_action def type(self, element, text, slowly=False): return element.type(text, slowly) @element_action def fill(self, element, text): return element.fill(text) @element_action def clear(self, element): self.fill(element, '') @element_action def click(self, element): return element.click() @element_action def choose(self, element, value): return element.choose(value) @element_action def select(self, element, value): return element.select(value) @element_action def select_by_text(self, element, text): return element.find_by_xpath( 'option[normalize-space(.)="%s"]' % text).first._element.click() @element_action def check(self, element): return element.check() @element_action def uncheck(self, element): return element.uncheck() @element_action def mouse_over(self, element): return element.mouse_over() @element_action def mouse_out(self, element): return element.mouse_out() def reload(self): return self._browser.reload() def go_back(self): return self._browser.back() def go_forward(self): return self._browser.forward() def execute_script(self, script): """This method is deprecated. Use `execute_javascript` instead. """ return self._browser.evaluate_script(script) def execute_javascript(self, script): return self._browser.evaluate_script(script) def get_iframe(self, iframe_id): return self._browser.get_iframe(iframe_id) def get_alert(self): return self._browser.get_alert() def attach_file(self, input_name, file_path): return self._browser.attach_file(input_name, file_path) def wait_pageload(self, timeout=30): wait_interval = 0.05 elapsed = 0 while self.execute_javascript('document.readyState') != 'complete': self.wait(wait_interval) elapsed += wait_interval if elapsed > timeout: raise PageNotLoadedException def click_and_wait(self, element, timeout=30): self.click(element) self.wait_pageload(timeout) def clear_session(self): self._browser.driver.delete_all_cookies()
def scrape(): # In[3]: #1.1 MARS NEWS------------------------------ # get latest news from nasa mars exploration page at https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest mars_news_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest' # set up a Browser to get access to js stuff executable_path = {"executable_path": "/chromedriver"} browser = Browser("chrome", **executable_path, headless=False) # In[4]: # visit the website browser.visit(mars_news_url) # In[5]: nasa_news = browser.html soup_nasa_news = bs(nasa_news, 'html.parser') nasa_news_title = soup_nasa_news.find('div', class_='content_title').text.strip() #nasa_news_teaser = soup_nasa_news.find('div', class_="artlce_teaser_body").text.strip() nasa_news_teaser = soup_nasa_news.find('div', class_='article_teaser_body').text # .find('li', class_='slide').find('div', class_='list_text') # print(nasa_news_title) # print(nasa_news_teaser) # In[6]: # 1.2 JPL Mars space images # Visit the url for JPL Featured Space Image https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars. # Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url. # Make sure to find the image url to the full size .jpg image. # Make sure to save a complete url string for this image. nasa_image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' browser.visit(nasa_image_url) # In[7]: button = browser.find_by_id('full_image') button.click() # In[8]: button1 = browser.find_by_text('more info ') button1.click() # In[9]: featured_image_url = browser.find_link_by_partial_href('spaceimages/images') #jpl_image = browser.html #soup_jpl_image = bs(jpl_image, 'html.parser') #soup_jpl_image featured_image_url = featured_image_url['href'] # In[10]: # Mars Weather # Visit the Mars Weather twitter account https://twitter.com/marswxreport?lang=en and scrape the latest Mars weather tweet from the page. # Save the tweet text for the weather report as a variable called mars_weather. mars_weather_url = 'https://twitter.com/marswxreport?lang=en' browser.visit(mars_weather_url) # In[14]: html = browser.html parsed_tweet = bs(html, 'html.parser') mars_weather = parsed_tweet.find('p', class_='tweet-text').text # print(mars_weather) # In[ ]: # In[15]: # Mars Facts # Visit the Mars Facts webpage https://space-facts.com/mars/ and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc. # Use Pandas to convert the data to a HTML table string. mars_facts_url = 'https://space-facts.com/mars/' browser.visit(mars_facts_url) # In[17]: mars_df = pd.read_html(mars_facts_url) # print(mars_df) # In[18]: mars_df[1] # In[19]: mars_facts_df = mars_df[1] mars_facts_df = mars_facts_df.to_html() mars_facts_df # In[35]: #Mars Hemispheres # Visit the USGS Astrogeology site https://space-facts.com/mars/ to obtain high resolution images for each of Mar's hemispheres. # You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image. # Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title. # Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere. base_hem_html = 'https://astrogeology.usgs.gov/' # used later mars_hem_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' browser.visit(mars_hem_url) # In[36]: html = browser.html hemisphere_parsed = bs(html,"html.parser") # In[37]: browser.click_link_by_partial_text('Cerberus Hemisphere Enhanced') #wait # i feel like there should be a "wait" command or something time.sleep(1) html = browser.html page_parsed = bs(html, 'html.parser') # In[40]: cerberus_image = page_parsed.find('img', class_='wide-image').get('src') cerberus_img_html = base_hem_html + cerberus_image cerberus_title = page_parsed.find('h2', class_='title').text # print(cerberus_img_html) # print(cerberus_title) # In[45]: # rinse-repeat Schiaparelli browser.visit(mars_hem_url) time.sleep(1) html = browser.html hemisphere_parsed = bs(html,"html.parser") # In[46]: browser.click_link_by_partial_text('Schiaparelli Hemisphere Enhanced') time.sleep(1) html = browser.html page_parsed = bs(html, 'html.parser') # In[47]: schiaparelli_image = page_parsed.find('img', class_='wide-image').get('src') schiaparelli_img_html = base_hem_html + schiaparelli_image schiaparelli_title = page_parsed.find('h2', class_='title').text # print(schiaparelli_img_html) # print(schiaparelli_title) # In[48]: # rinse-repeat Syrtis browser.visit(mars_hem_url) time.sleep(1) html = browser.html hemisphere_parsed = bs(html,"html.parser") # In[50]: browser.click_link_by_partial_text('Syrtis Major Hemisphere Enhanced') time.sleep(1) html = browser.html page_parsed = bs(html, 'html.parser') # In[51]: syrtis_image = page_parsed.find('img', class_='wide-image').get('src') syrtis_img_html = base_hem_html + syrtis_image syrtis_title = page_parsed.find('h2', class_='title').text # print(syrtis_img_html) # print(syrtis_title) # In[52]: # rinse-repeat Valles browser.visit(mars_hem_url) time.sleep(1) html = browser.html hemisphere_parsed = bs(html,"html.parser") # In[54]: browser.click_link_by_partial_text('Valles Marineris Hemisphere Enhanced') time.sleep(1) html = browser.html page_parsed = bs(html, 'html.parser') # In[55]: valles_image = page_parsed.find('img', class_='wide-image').get('src') valles_img_html = base_hem_html + valles_image valles_title = page_parsed.find('h2', class_='title').text # print(valles_img_html) # print(valles_title) # In[57]: # bring it all together in a dict hs_title_img_final = [ {"title": cerberus_title, "img_src": cerberus_img_html}, {"title": schiaparelli_title, "img_src": schiaparelli_img_html}, {"title": syrtis_title, "img_src": syrtis_img_html}, {"title": valles_title, "img_src": valles_img_html} ] # print(hs_title_img_final) # In[39]: #I could probably loop the above section for all hemispheres, but I can't think of how to do it at the moment # hs_titles = [] # hs_urls = [] # img_title_loc = hemisphere_parsed.find_all('a', class_='h3') # for x in img_title_loc: # hs_title.append(hemisphere_parsed.find('h3').text) # hs_urls.append(base_hem_html + hemisphere_parsed.find('a', class_='href') # make dictionary out of all collected data for later use in flask app mars_info={"nasa_news_title": nasa_news_title, "nasa_news_teaser": nasa_news_teaser, "featured_image_url":featured_image_url, "mars_weather_url":mars_weather_url, "mars_weather":mars_weather, "mars_facts_df":mars_facts_df, "hs_title_img_final":hs_title_img_final } browser.quit() return mars_info
from splinter import Browser import time from collections import defaultdict import json import re if __name__ == '__main__': f = open('.env', 'r') env = {} print('reading env variables') for line in f.readlines(): env[line.strip().split("=")[0]] = line.strip().split("=")[1] print('logging into site') br = Browser("firefox") br.visit("https://www.fantasycruncher.com/login?referer=/") br.find_by_id('user_email').fill(env['fc_login']) br.find_by_id('user_password').fill(env['fc_pw']) br.find_by_id('submit').click() rewind_base = "https://www.fantasycruncher.com/lineup-rewind/draftkings/NBA/" date = "2018-10-29" br.visit(rewind_base + date) all_players = "/html/body/div[3]/div[1]/div[1]/div/div[2]/div[8]/div[2]/div[2]/div[2]/div[2]/div/label/select/option[7]" br.find_by_name("ff_length").click() br.find_by_xpath(all_players).click() tr_selector = "/html/body/div[3]/div[1]/div[1]/div/div[2]/div[8]/div[1]/div[2]/table/tbody/tr" print('Opening Data') f = open('data/empty_dates.txt', 'r') dates = [] for line in f.readlines(): dates.append(line.strip()) #salary_data = pd.read_csv('data/salary_data_gaps.csv')
# In[ ]: #PL Mars Space Images - Featured Image #Visit the url for JPL Featured Space Image here. #Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url. #Make sure to find the image url to the full size .jpg image. #Make sure to save a complete url string for this image. # Example: featured_image_url = 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16225_hires.jpg' # In[11]: url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' browser.visit(url) fullimage = browser.find_by_id("full_image").click() findlink = browser.find_link_by_partial_text("more info").click() # In[18]: soup = bs(browser.html, "html.parser") fimage = soup.find("figure", class_="lede") fullimage = fimage.a.img["src"] final_image_link = "https://www.jpl.nasa.com" + fullimage final_image_link # In[29]: mars["featured_image_url"] = final_image_link # In[ ]:
wb = xlrd.open_workbook(filename) sheet = wb.sheet_by_index(0) br = Browser() br.visit("https://www.cardmaker.net/yugioh/") form_ids = [ "name", "cardtype", "subtype", "attribute", "level", "trapmagictype", "rarity", "picture", "circulation", "set1", "set2", "type", "carddescription", "atk", "def", "creator", "year", "serial" ] opener = urllib.request.build_opener() opener.addheaders = [("User-Agent", "Mozilla/5.0")] urllib.request.install_opener(opener) for r in range(2, sheet.nrows): for c in range(1, sheet.ncols): form_id = form_ids[c - 1] form_value = sheet.cell_value(r, c) if not form_value: break if c == 3: form_value = form_value.lower() elif c == 5 or c == 14 or c == 15 or c == 17: form_value = str(int(form_value)) try: br.find_by_id(form_id).first.fill(form_value) except Exception: br.find_by_id(form_id).first.select(form_value) br.find_by_id("generate").first.click() if sheet.cell_value(r, 1): src = br.find_by_id("card").first["src"] urllib.request.urlretrieve(src, sheet.cell_value(r, 1) + ".jpeg")
browser.find_by_name('submit').first.click() #Test Case 2 print("Running Test Case 3: Visit Activities/Utilities Page") #browser.visit('http://54.191.193.7:5000/') print("Clicking Play Button") time.sleep(2) browser.find_link_by_text('Play').first.click() print("Play button pressed...") time.sleep(2) assert browser.is_text_present('Activities') == True print("=========================================================") #Test Case 3 print("Running Test Case 4: Test Image Upload Pages") browser.find_by_id("sportsButton").first.click() print("Visiting sports upload image activity") assert "selfie/sports" in browser.url browser.back() browser.find_by_id("vehiclesButton").first.click() assert "selfie/vehicles" in browser.url browser.back() # browser.find_by_id("emojisButton").first.click() # assert "selfie/emojis" in browser.url # browser.back() # browser.find_by_id("animalsButton").first.click() # assert "selfie/animals" in browser.url #Test Case 4: print("Running test case 5: Test Image swipe pages accessible") browser.find_by_id("swipeSports").first.click()
def register(activity): username = '******' password = '******' browser = Browser('chrome', headless=True) # browse the warrior index page # browser.visit('https://warrior.uwaterloo.ca/') # browse Facility Access Reservation # browser.visit('https://warrior.uwaterloo.ca/Program/GetProducts?classification=01e6d66f-044b-42c0-9cc9-495c9586f4db') # Browse the Facility Access Reservation categories # browser.find_by_css('.Menu-Item').first.click() # browse activity page directly based on the activities if (activity == 'gym'): browser.visit('https://warrior.uwaterloo.ca/Program/GetProgramDetails?courseId=cc2a16d7-f148-461e-831d-7d4659726dd1&semesterId=b0d461c3-71ea-458e-b150-134678037221') if (activity == 'badminton'): browser.visit('https://warrior.uwaterloo.ca/Program/GetProgramDetails?courseId=5f834760-8c08-4eff-8d1d-fbe01dd538f6&semesterId=b0d461c3-71ea-458e-b150-134678037221') # Browse the CIF FITNESS CENTER program # dict = {'gym': 1, 'badminton': 2} # browser.find_by_css('.list-group-item')[dict[activity]].click() # Check login status if browser.is_element_present_by_text('Log In'): print("Login required!") browser.execute_script("showLogin();") # wait for dynamic content to load while not browser.find_by_id("modalLogin").first.visible: time.sleep(.1) browser.execute_script("showLocalLoginForm();") # Fill the username and password print("Filling in user info...") while not browser.find_by_name("Username").first.visible: time.sleep(.1) while(browser.find_by_name("Username").first.value == ''): browser.fill('Username', username) while(browser.find_by_name("Password").first.value == ''): browser.fill('Password', password) # Submit and login browser.execute_script("submitLogin();") # Wait for login process to finish, then reload print("Logging in...") while browser.is_element_present_by_id("modalLogin"): time.sleep(.1) print("Login Successful!") browser.reload() # Login before the hour in case there are other toolmen if (datetime.now().minute == 59): print("Toolman stand by...") while(datetime.now().minute != 0): time.sleep(1) browser.reload() # Register the latest reservation resv = browser.find_by_text("Register") resv.last.click() # Accept the waiver and checkout print("Signing the waiver...") while browser.is_text_not_present("ACCEPT NOW"): time.sleep(.1) browser.find_by_text("Accept Now").first.click() for i in range(1, 9): browser.choose('CustomPrompts[{}].CommonInput'.format(i), 'False') browser.find_by_text("Add to Cart").first.click() browser.execute_script("Submit();") print("Registerd!") return
def scrape(): # Create a library that holds all the Mars' Data mars_library = {} # Execute Chromedriver executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path) # URL of NASA Mars News to be scraped url_1 = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest' #Visit the page using the browser browser.visit(url_1) # assign html content html = browser.html # Create a Beautiful Soup object soup_1 = bs(html, "html.parser") # Assign the title to variables news_title = soup_1.find_all( 'div', class_='content_title')[0].find('a').text.strip() # Assign the text content to variables news_p = soup_1.find_all( 'div', class_='rollover_description_inner')[0].text.strip() # assign scrapped objects into Lib mars_library['news_title'] = news_title mars_library['news_p'] = news_p # #### Part II. PL Mars Space Images - Featured Image # URL of JPL Mars pictures to be scraped url_2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' #Visit the JPL website browser.visit(url_2) # assign html content html = browser.html # Create a new Beautiful Soup object soup_2 = bs(html, 'html.parser') # Find and execute the full image button full_image_elem = browser.find_by_id('full_image') full_image_elem.click() # Find more picture objects by clicking on "more info" button browser.is_element_present_by_text('more info', wait_time=10) more_info_elem = browser.find_link_by_partial_text('more info') more_info_elem.click() # retrieve image's url address img_url_partial = soup_2.find_all( 'a', class_='fancybox')[0].get('data-fancybox-href').strip() # combine image url and JPL url image_url = "https://www.jpl.nasa.gov" + img_url_partial mars_library['image_url'] = image_url # #### Part III. Mars Weather # # Load URL of Mars Weather twitter account url_3 = 'https://twitter.com/marswxreport?lang=en' #Visit the Mars Weather twitter account browser.visit(url_3) # assign html content html = browser.html # Create a Beautiful Soup object soup_3 = bs(html, 'html.parser') #scrap latest Mars weather tweet mars_weather = soup_3.find_all( 'p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text' )[0].text mars_library['mars_weather'] = mars_weather # #### Part IV. Mars Facts # # URL of Mars Facts webpage to be scraped url_4 = 'https://space-facts.com/mars/' profile_table = pd.read_html(url_4) # convert table info into dataframe df = profile_table[0] # rename the columns df.columns = ['description', 'value'] #Set the index to the description column df.set_index('description', inplace=True) # Deploy the DataFrame to HTML mars_facts = df.to_html('MarsFactsTable.html') mars_library['mars_facts'] = mars_facts # #### Part V. Mars Hemisperes # # URL of USGS Astrogeology site url_5 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' #Visit USGS Astrogeology site browser.visit(url_5) # # assign html content # html = browser.html # # Create a new Beautiful Soup object # soup_5 = bs(html, 'html.parser') # # get all the title # results = soup_5.find_all('h3') # assign image objects to a new list hemisphere_images = [] # Get a list of all of the hemisphere images links = browser.find_by_css("a.product-item h3") # Loop through all the links, find the anchor and return the "href" for i in range(len(links)): hemisphere = {} # Find the elements on each loop browser.find_by_css("a.product-item h3")[i].click() # locate image anchor tag and extract the href sample_elem = browser.find_link_by_text('Sample').first hemisphere['img_url'] = sample_elem['href'] # Get Hemisphere title hemisphere['title'] = browser.find_by_css("h2.title").text # Append hemisphere image objects to the list hemisphere_images.append(hemisphere) # navigate back browser.back() # review saved images List hemisphere_images mars_library['hemisphere_images'] = hemisphere_images # Return Library return mars_library
def scrape(): mars = dict() mars_url = 'https://mars.nasa.gov/news/' response = requests.get(mars_url) soup = BeautifulSoup(response.text, 'lxml') try: news_title = soup.find("div", class_="content_title").text news_p = soup.find("div", class_="rollover_description_inner").text print("The news title is" + news_title) print("The text is" + news_p) except AttributeError as Atterror: print(Atterror) mars["title"] = news_title mars["paragraph"] = news_p space_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' executable_path = {'executable_path': '/usr/local/bin/chromedriver'} browser = Browser('chrome', **executable_path, headless=False) browser.visit(space_url) image = browser.find_by_id('full_image') image.click() time.sleep(2) link = browser.find_link_by_partial_text('more info') link.click() soup2 = BeautifulSoup(browser.html, 'html.parser') reference = soup2.find('figure', class_='lede') final_link = reference.a['href'] featured_image_url = 'https://www.jpl.nasa.gov/' + final_link mars['featured_image_url'] = featured_image_url print(featured_image_url) twitter_url = 'https://twitter.com/marswxreport?lang=en' response3 = requests.get(twitter_url) soup3 = BeautifulSoup(response3.text, 'lxml') ##print(soup3.prettify()) weather = soup3.find( "p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" ).text mars["weather"] = weather facts_url = 'https://space-facts.com/mars/' mars_facts = pd.read_html(facts_url) mars_facts[0].rename(columns={0: "Type", 1: "Stat"}, inplace=True) marsdf = mars_facts[0] mars_html = marsdf.to_html() mars['html'] = mars_html mars_hem = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' browser.visit(mars_hem) soup5 = BeautifulSoup(browser.html, 'html.parser') class_collap_results = soup5.find('div', class_="collapsible results") items = soup5.find('div', class_="collapsible results").find_all('div', class_='item') List = list() image_urls = list() titles = list() for i in items: title = i.h3.text titles.append(title) href = "https://astrogeology.usgs.gov" + i.find( 'a', class_='itemLink product-item')['href'] browser.visit(href) time.sleep(10) soup6 = BeautifulSoup(browser.html, 'html.parser') urls = soup6.find('div', class_='downloads').find('li').a['href'] image_urls.append(urls) hem_dict = dict() hem_dict['title'] = title hem_dict['img_url'] = urls List.append(hem_dict) mars['hemisphere_urls'] = List return mars
import random def Wait(): time.sleep(random.uniform(0, 2)) ExecutablePath = input("Please your chromedriver.exe's path: ") UserName = input("Please input your username: "******"Please input your password: "******"chrome", executable_path=ExecutablePath) URL = "http://www.wlxt.uestc.edu.cn/wlxt/index.aspx" Brs.visit(URL) Wait() Brs.find_by_id("hllogin").click() Wait() Brs.find_by_id("btnidaslogin").click() Wait() Brs.fill("username", UserName) Wait() Brs.fill("password", Password) Wait() Brs.find_by_text("登录").click() Wait() Brs.find_by_id("dataGridStudentCourse_imgbtnEnterCourseStudy_1").click() for i in range(0, 100): Brs.visit(URL) time.sleep(random.uniform(0, 2)) Brs.find_by_id("dataGridStudentCourse_imgbtnEnterCourseStudy_1").click()
def splinter(url): browser = Browser() print('Opening browser\n') browser.visit(url) time.sleep(3) print('Selecting user type\n') browser.find_by_id('userTypeSelect').click() time.sleep(1) print('Click \"Student\"\n') browser.find_by_text('Student').click() time.sleep(1) print('Filling email and password\n') browser.find_by_id('inputEmail').fill('*****@*****.**') browser.find_by_id('inputPassword').fill('8C51B7') print('Submitting form\n') browser.find_by_id('submitForm').click() time.sleep(3) print('Continue courses\n') browser.find_link_by_href('CourseLogin.aspx?courseid=2360111').click() time.sleep(3) print('Click the lesson') browser.find_link_by_href( '/mod/homepage/view.php?id=322&continue=true').click() for i in range(1, 100): try: for h in range(4): choice = 'multichoice_' + str(i) + '_' + str(h + 1) print('Click choice: ' + choice) browser.find_by_id(choice).click() for k in range(4): unclick = 'multichoice_' + str(i) + '_' + str(k + 1) browser.find_by_id(unclick).click() time.sleep(1) browser.find_by_value('Check Response').click() time.sleep(1) try: browser.find_by_id('nextnavbutton').click() print('Correct') except: print('Not correct') browser.find_by_id(unclick).click() except: for j in range(4): choice = 'multichoice_' + str(i) + '_' + str(j + 1) print('Looking for id: ' + choice) browser.find_by_id(choice).click() time.sleep(1) browser.find_by_value('Check Response').click() time.sleep(1) try: browser.find_by_id('nextnavbutton').click() except: print('Wrong choice') print('Done or not a multi choice question') finally: print('Countdown started: 60sec') time.sleep(60) print('Trying to click \"next\"') browser.find_by_id('nextnavbutton').click()
def scrape_data(): executable_path = {"executable_path": "chromedriver.exe"} browser = Browser("chrome", **executable_path, headless=False) #Visiting the NASA website for news url = "http://mars.nasa.gov/news/" browser.visit(url) time.sleep(5) #We will convert the html from the browser to a soup object html = browser.html soup = BeautifulSoup(html, "html.parser") element = soup.select_one("li.slide") #Find Title of the page under "div" and class of "content_title" and save it into titles titles = element.find("div", class_="content_title").get_text() #Find the article text using "div" with class of "article_teaser_body" and save it into article article = element.find("div", class_="article_teaser_body").get_text() #Visit the Mars Image website and open it using chromedriver url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars" browser.visit(url) time.sleep(1) #Use Splinter to navigate the page and find the current Featured Mars Image image_element = browser.find_by_id("full_image") image_element.click() #Click on "more info" button browser.is_element_present_by_text("more info") more_info_element = browser.find_link_by_partial_text("more info") more_info_element.click() #Use Beautiful soup and parser to parse the html page html = browser.html soup_image = BeautifulSoup(html, "html.parser") image_url = soup_image.select_one("figure.lede a img").get("src") featured_image_url = f"https://www.jpl.nasa.gov{image_url}" featured_image_url #Visit the Mars weather Twitter to scrape Mars weather Info url = "https://twitter.com/marswxreport?lang=en" browser.visit(url) time.sleep(1) #Use Beautiful soup and parser to parse the html page html = browser.html soup_weather = BeautifulSoup(html, "html.parser") #Find the tweet with "Mars Weather" using soup.find_all tweet = soup_weather.find_all("article", role="article")[0] #Save the weather data into mars_weather variable mars_weather = tweet.find_all("span")[4].text #Use Pandas to read html dataframe #For some reason, I kept getting errored out. I could not find a fix, #so I hard coded the info myself since the info is unlikely to change. # url = "https://space-facts.com/mars/" # browser.visit(url) # df = pd.read_html(url) # df.columns = ["Facts", "Measurements"] # df.set_index("Facts", inplace=True) # df data = [["Equatorial Diameter:", "6,792 km"], ["Polar Diameter:", "6,752 km"], ["Mass:", "6.39 × 10^23 kg (0.11 Earths)"], ["Moons:", "2 (Phobos & Deimos)"], ["Orbit Distance:", "227,943,824 km(1.38 AU)"], ["Orbit Period:", "687 days (1.9 years)"], ["Surface Temperature:", "-87 to -5 °C"], ["First Record:", "2nd millennium BC"], ["Recorded By:", "Egyptian astronomers"]] df = pd.DataFrame(data, columns=["Facts", "Measurements"]) df_html = df.to_html() #Visit the astrogeology.usgs.gov website and open it using chromedriver url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars" browser.visit(url) time.sleep(1) #Use Beautiful soup and parser to parse the html page html = browser.html soup_hemisphere = BeautifulSoup(html, "html.parser") #Scrape the page details hemisphere_element = soup_hemisphere.find_all("div", class_="item") #Create Empty list to store the image urls hemisphere_image_urls = [] #The href tag is missing the base Url link that comes before the contents in href url. #Therefore, we should add the base url to the href url before storing it into our empty list. base_url = "https://astrogeology.usgs.gov" #We will loop through each of the divs to store the relevant information into our empty list for image in hemisphere_element: #find the titles and save it into a variable title = image.find("h3").text #We will get the links under href tag and store into a variable added_url = image.find("a", class_="itemLink product-item")["href"] #Visit the Url browser.visit(base_url + added_url) #Use Beautiful soup and parser to parse the html page html = browser.html soup = BeautifulSoup(html, "html.parser") #Get the image url by going under src tag and adding it to the base url image_url = base_url + soup.find("img", class_='wide-image')["src"] #Append the our url list hemisphere_image_urls.append({"title": title, "image_url": image_url}) hemisphere_image_urls #Quit the Browser browser.quit() final_data = { "news_title": title, "news_article": article, "featured_image": featured_image_url, "mars_weather": mars_weather, "mars_facts": df_html, "mars_hemisphere": hemisphere_image_urls } return final_data
'11 September 2020', '18 September 2020', '25 September 2020' ] while (t != 15): print("Current time: " + time.asctime(time.localtime(time.time())) + "\n") a = Browser('chrome') a.driver.set_window_size(640, 1480) try: a.visit('https://www.delta.com/') except: print('exception occured') a.quit() continue Open = a.find_by_id('mobile-expand-widg') Open.click() From = a.find_by_id('fromAirportName') From.click() FromBar = a.find_by_id('search_input') FromBar.click() active_web_element = a.driver.switch_to_active_element() i = 0 time.sleep(1) while i != 3: active_web_element.send_keys(Keys.BACKSPACE) i += 1 time.sleep(1)
def scrape(): # ------------------------------------------------ Mars News ---------------------------------------------------- # URL of page to be scraped url = "https://mars.nasa.gov/news/" response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') # Find latest news title about Mars news_title = soup.find('div', class_="content_title").text news_title # Find latest news blurb news_p = soup.find('div', class_="rollover_description_inner").text news_p # ------------------------------------------------ Featured Image ---------------------------------------------------- # * Use splinter to navigate the site and find the image url for the current Featured Mars Image executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path) url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' browser.visit(url) featured_image = browser.find_by_id('full_image') featured_image.click() time.sleep(2) more_info = browser.find_link_by_partial_text('more info') more_info.click() # Pull featured image url html = browser.html soupsearch = BeautifulSoup(html, 'html.parser') part_image_url = soupsearch.find('img', class_='main_image').get('src') featured_image_url = 'https://www.jpl.nasa.gov' + part_image_url featured_image_url # Exit browser browser.quit() # ------------------------------------------------ Mars Twitter ---------------------------------------------------- # Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en) # and scrape the latest Mars weather tweet from the page. url = "https://twitter.com/marswxreport?lang=en" response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') mars_weather = soup.find('div', class_='js-tweet-text-container').text mars_weather # ------------------------------------------------ Mars Facts ---------------------------------------------------- # # Pull Mars facts table from Space-Facts executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path) url = 'https://space-facts.com/mars/' marsFacts_df = pd.read_html(url) marsFacts_df = marsFacts_df[0] marsFacts_df # # * Use Pandas to convert the data to a HTML table string. # marsFacts_df.to_html('mars_facts.html', index=False) marsHTML = marsFacts_df.to_html(index=False, header=None) print(marsHTML) browser.quit() # ------------------------------------------------ Mars Hemispheres ---------------------------------------------------- executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path) url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' browser.visit(url) cerberus = browser.find_link_by_partial_text('Cerberus') cerberus.click() html = browser.html soupsearch = BeautifulSoup(html, 'html.parser') astrogeology_url = 'https://astrogeology.usgs.gov/' #--------------------------------------- cerberus_url = soupsearch.find('img', class_='wide-image').get('src') cerberus_img_url = astrogeology_url + cerberus_url print('cerberus image') print(cerberus_img_url) back = browser.find_link_by_partial_text('Back') back.click() #--------------------------------------- schiaparelli = browser.find_link_by_partial_text('Schiaparelli') schiaparelli.click() html = browser.html soupsearch = BeautifulSoup(html, 'html.parser') schiaparelli_url = soupsearch.find('img', class_='wide-image').get('src') schiaparelli_img_url = astrogeology_url + schiaparelli_url back = browser.find_link_by_partial_text('Back') back.click() #--------------------------------------- syrtis = browser.find_link_by_partial_text('Syrtis') syrtis.click() html = browser.html soupsearch = BeautifulSoup(html, 'html.parser') syrtis_url = soupsearch.find('img', class_='wide-image').get('src') syrtis_img_url = astrogeology_url + syrtis_url back = browser.find_link_by_partial_text('Back') back.click() valles = browser.find_link_by_partial_text('Valles') valles.click() html = browser.html soupsearch = BeautifulSoup(html, 'html.parser') valles_url = soupsearch.find('img', class_='wide-image').get('src') valles_img_url = astrogeology_url + valles_url valles_img_url # Save hemisphere image urls in a dictionary. hemisphere_image_urls = [ {"title": "Valles Marineris Hemisphere", "img_url": valles_img_url}, {"title": "Cerberus Hemisphere", "img_url": cerberus_img_url}, {"title": "Schiaparelli Hemisphere", "img_url": schiaparelli_img_url}, {"title": "Syrtis Major Hemisphere", "img_url": syrtis_img_url}, ] # print(hemisphere_image_urls) # Close out of browser browser.quit() # ------------------------------------------------ Full Mars ---------------------------------------------------- # Save all variables in a dictionary mars_data = { "hemisphere_image_urls": hemisphere_image_urls, "news_p" : news_p, "news_title" : news_title, "featured_image_url": featured_image_url, "mars_weather": mars_weather, "mars_facts": marsHTML } return mars_data
for row in range(0, num): #限制从第几行开始读取数据 hid = rawdata.row_values(row)[0] print(hid) if i == 0: token=token1 else: token=token2 weburl = "http://webresource.123kanfang.com/31test-403/1228/studioClient4/client.html?v=2020121101&noCache=true&hid=" + hid + "&domain=//vrhouse.oss-cn-shanghai.aliyuncs.com/&token="+ token + "&vconsole=1&clearCache=1607662144149" browser = Browser('chrome') #打开谷歌浏览器 browser.visit(weburl) print(weburl) time.sleep(5) browser.find_by_id('goNextBtn').click() data = { "packageId": hid, "isFinished": "true", "Authorization": token1 } time.sleep(5) r = requests.post(url, data=data) res = json.loads(r.text) print(res) try: if res['state'] == 200: print(hid + " 发布成功") Hid[0].append(hid) browser.quit()
#book = book_list.div.a.img["alt"] #print(book) # # Vist the NASA mars News site: Image Scrapping - module 10.3.4 #visit url url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' browser.visit(url) # Find and click the full image button full_image_elem = browser.find_by_id('full_image') full_image_elem.click() #Find the more info button and click browser.is_element_present_by_text('more info', wait_time=1) more_info_elem = browser.links.find_by_partial_text('more info') more_info_elem.click() #Parse the resulting html with soup html = browser.html img_soup = soup(html, 'html.parser') #Find the relative image url img_url_rel = img_soup.select_one('figure.lede a img').get('src') img_url_rel #Use the base url to create an absolute url
change_brig(screenshot) crop_img("captcha.png") img = threshold("captcha.png") captcha = tesseract(img) #time.sleep(2) print captcha bro.fill('usuario','J311968199') bro.fill('contrasenia','J-311968199a') bro.fill('captcha', str(captcha)) bro.find_by_id('btnLoginSisap').click() flag = False while not flag: ejecutar() principal_menu = bro.find_by_id("principal-menu") if principal_menu != []: principal_menu.click() bro.click_link_by_href("/informacion-general/informacion-seniat") bro.click_link_by_href("#inf_accionistas") bro.click_link_by_href("/accionistas/gestion") bro.select("id_tipo_relacion_empresa", "526") bro.select("id_pais","229") bro.fill("correo", "*****@*****.**") bro.fill("cantidad_acciones","1234") #bro.find_by_id("btnAccionistas").mouse_over() flag = True #ipdb.set_trace()
def mars_scrape(): executable_path = {"executable_path": "C:\chromedriver.exe"} browser = Browser("chrome", **executable_path, headless=False) #Function to scrape all necessary information from mars related websites #Create empty dictionay to store all the mars information. mars_info_dict = dict() ## Part 1. ### NASA Mars News ## --------------------------------------------------- #Define url and browse the site using chrome. url = 'https://mars.nasa.gov/news/' #browser = init_browser() browser.visit(url) html = browser.html #create soup object and use beautiful soup to parse html. soup1 = BeautifulSoup(html, 'html.parser') #print(soup.prettify()) #* Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars). #* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a #* variable called `featured_image_url`. Make sure to find the image url to the full size `.jpg` image. #* Make sure to save a complete url string for this image. url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' browser.visit(url2) full_image = browser.find_by_id('full_image') full_image.click() # Scrape information from html for https://mars.nasa.gov/news/. class is "content_title' for news title. try: result_title = soup1.find('ul', class_="item_list").find( 'li', class_="slide").find('div', class_="content_title").text #Class is class="article_teaser_body" for para text. news_body = soup1.find('ul', class_="item_list").find( 'li', class_="slide").find('div', class_="article_teaser_body").text print("The news title is " + result_title) #print(f"The news_title is: {news_title}") print("The news body is " + news_body) #print(f"The News Body is: {news_body}") except AttributeError as Atterror: print(Atterror) #Append results from part 1 into the final mars_info dictionary. mars_info_dict["Mars_news_title"] = result_title mars_info_dict["Mars_news_body"] = news_body print(mars_info_dict) ## end of part 1 to retrieve news title and a news body. ## Part 2. ### JPL Mars Space Images - Featured Image ## --------------------------------------------------- #click on the link for "more info" time.sleep(5) link_more_info = browser.find_link_by_partial_text('more info') link_more_info.click() #Retrieve the html from the page. Parse htnl using bs4 and find the path for the full size image. fullimg_html2 = browser.html soup2 = BeautifulSoup(fullimg_html2, "html.parser") fullimg_href = soup2.find('figure', class_='lede').a['href'] featured_image_url = "https://www.jpl.nasa.gov" + fullimg_href print(featured_image_url) #Append featured image url to the Mars dictionary. mars_info_dict["Mars_featured_image_url"] = featured_image_url print(mars_info_dict) ## Part 3 . ### Mars Weather tweet ## ------------------------------- ##* Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en) and scrape the latest Mars weather ## tweet from the page. Save the tweet text for the weather report as a variable called `mars_weather`. url3 = 'https://twitter.com/marswxreport?lang=en' browser.visit(url3) html3 = browser.html soup3 = BeautifulSoup(html3, 'html.parser') #print(soup.prettify()) #Save the tweet text for the weather report as a variable called `mars_weather`. mars_weather = soup3.find( 'p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" ).text mars_weather #Add weather tweet to the mars_info dict. mars_info_dict["Mars_tweet_weather"] = mars_weather print(mars_info_dict) # # Part 4. ### Mars Facts ## ------------------------- # Visit the Mars Facts webpage [here](http://space-facts.com/mars/) and use Pandas to scrape the table containing facts # about the planet including Diameter, Mass, etc. # Use Pandas to convert the data to a HTML table string url4 = "http://space-facts.com/mars/" df_marsfacts_all = pd.read_html(url4) df_marsfacts = df_marsfacts_all[1] #df_marsfacts # Provide appropriate column names for the dataframe. df_marsfacts.columns = ['Mars_Facts', 'Values'] #convert to html df_marsfacts.to_html("mars_facts.html", index=False) #set index for better retrieval. df_marsfacts.set_index("Mars_Facts") #Add another html version of the Mars facts tables. mars_facts_html = df_marsfacts.to_html( classes="mars_facts table table-striped") mars_info_dict["Mars_facts_table"] = mars_facts_html print(mars_info_dict) # # Part 5. ### Mars Hemispheres #------------------------------- # Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) # to obtain high resolution images for each of Mar's hemispheres. # You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image. # Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the # hemisphere name. # Use a Python dictionary to store the data using the keys `img_url` and `title`. append the dictionary with the image url # string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere. # use splinter and soup to retrieve html and convert to soup object. url5 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/" browser.visit(url5) time.sleep(10) html5 = browser.html soup5 = BeautifulSoup(html5, "html.parser") #parse soup object for images of the 4 hemispheres . class_collap_results = soup5.find('div', class_="collapsible results") hemis_items = class_collap_results.find_all('div', class_='item') #hemis_items #loop thru to find tile and the image urls to append to relevant lists. hemis_img_urls_list = list() img_urls_list = list() title_list = list() for h in hemis_items: #save title h_title = h.h3.text title_list.append(h_title) # find the href link. h_href = "https://astrogeology.usgs.gov" + h.find( 'a', class_='itemLink product-item')['href'] #print(h_title,h_href) #browse the link from each page browser.visit(h_href) time.sleep(5) #Retrieve the image links and store in a list. html5 = browser.html soup_img = BeautifulSoup(html5, 'html.parser') h_img_url = soup_img.find('div', class_='downloads').find('li').a['href'] #print("h_img_url" + h_img_url) img_urls_list.append(h_img_url) # create a dictionary with each image and title and append to a list. hemispheres_dict = dict() hemispheres_dict['title'] = h_title hemispheres_dict['img_url'] = h_img_url hemis_img_urls_list.append(hemispheres_dict) print(hemis_img_urls_list) print(title_list) print(img_urls_list) #print(len(hemis_img_urls_list)) #Add hemispheres list to the mars_info dictionary. mars_info_dict["Hemisphere_image_urls"] = hemis_img_urls_list print(mars_info_dict) #Generate date time and store in the dictionary. cur_datetime = datetime.datetime.utcnow() mars_info_dict["Date_time"] = cur_datetime print(mars_info_dict) #Return final dictionary with all the mars information that was scraped in the 5 steps above. print("just before final return of mars_info_dict") mars_return_dict = { "News_Title": mars_info_dict["Mars_news_title"], "News_Summary": mars_info_dict["Mars_news_body"], "Featured_Image": mars_info_dict["Mars_featured_image_url"], "Weather_Tweet": mars_info_dict["Mars_tweet_weather"], "Facts": mars_facts_html, "Hemisphere_Image_urls": hemis_img_urls_list, "Date": mars_info_dict["Date_time"], } return mars_return_dict # End of Main scrape function # Mainline code to test function mars_scrape() #mars_data_result = mars_scrape() #pprint(mars_data_result) #mars_scrape()
from splinter import Browser import time a = 0 for i in range(100000): browser = Browser('firefox') url = 'http://adf.ly/1XoGv5/' browser.cookies.add({ 'Cookie': '__cfduid=d65fca672e4ed745194d2af98aabb8efe1457097429; FLYSESSID=84eae741895b01015a60a626a97468d73e5ad96d; adf1=24e38bdc2ec8454845bbb3746f1d0fbc; adf2=71e391e015735904abe1a112e3cfb7fa; __utma=255621336.866444478.1457097430.1457097430.1457097430.1; __utmb=255621336.0.10.1457097430; __utmc=255621336; __utmz=255621336.1457097430.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' }) browser.visit(url) time.sleep(10) button = browser.find_by_id('skip_ad_button') button.click() window = browser.windows.current window.close() a = a + 1 print('完成' + str(a) + '次请求')
from splinter import Browser import re, sys # Ask user to input the 16-digit receipt number and format the entry data string rn = raw_input('Receipt Number: ') if not re.match(r"\d{16}", rn): print 'Receipt number must be a 16-digit number.' exit() rns = [rn[i:i + 3] for i in range(0, len(rn), 3)] b = Browser('phantomjs') sys.stdout.write('.') sys.stdout.flush() b.visit('https://tellburgerking.com.cn/') b.find_by_id('NextButton').click() sys.stdout.write('.') sys.stdout.flush() b.fill('CN1', rns[0]) b.fill('CN2', rns[1]) b.fill('CN3', rns[2]) b.fill('CN4', rns[3]) b.fill('CN5', rns[4]) b.fill('CN6', rns[5]) b.find_by_id('NextButton').click() sys.stdout.write('.') sys.stdout.flush() for e in b.find_by_xpath( "//div[@class='Opt2 rbloption']/span/span[@class='radioBranded']"):
def post_resume(login, passw, sity): sity = unicode(sity) browser = Browser(user_agent=useragent) browser.visit('http://hh.ru/auth/employer') browser.fill('username', login) browser.fill('password', passw) time.sleep(1) browser.find_by_xpath( '//*[starts-with(@class,"HH-SimpleValidation-Submit")]').first.click() time.sleep(3) #browser.click_link_by_href('hh.ru/employer/vacancy.do') #browser.visit('http://hh.ru/employer/vacancy.do') browser.find_by_xpath( '//*[starts-with(@href,"/employer/vacancy.do")]').first.click() time.sleep(3) try: browser.find_by_xpath( '//*[starts-with(@class,"newpopup__button")]').first.click() browser.find_by_xpath( '//*[starts-with(@class,"newpopup__closeable")]').first.click() except: pass v_name = u'Стажер-разработчик Python (OpenERP)' v_desc = u""" Обязанности: Программирование OpenERP Требования: Опыт работы с Python, Опыт работы с реляционными СУБД Условия: Удаленное обучение. Работа постоянная, удаленная, сдельная. Для стажера сумма вознаграждения по результатам собеседования.. Подробнее http://arterp.ru/vacancy-openerp-trainee/ """ browser.fill('name', v_name) browser.fill('areaName', sity) browser.choose('scheduleId', '3') browser.choose('employmentId', '1') browser.find_by_xpath( '//*[starts-with(@class,"b-forma-text")]').first.select("1") browser.find_by_id('HH-specializationChooser-checkbox_50').first.check() #Stage browser.find_by_id('HH-specializationChooser-checkbox_172').first.check() frame = browser.find_by_xpath( '//*[starts-with(@class,"jsxComponents-Editor-Frame")]')[0] print frame.value print frame['class']
def scrape__(): # set path for chrome broswer to open a blank chrome page executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path, headless=True) # set up empty dicts to append mars_data = {} # .___ ___. ___ .______ _______. .__ __. ___________ __ ____ _______. # | \/ | / \ | _ \ / | | \ | | | ____\ \ / \ / / / | # | \ / | / ^ \ | |_) | | (----` | \| | | |__ \ \/ \/ / | (----` # | |\/| | / /_\ \ | / \ \ | . ` | | __| \ / \ \ # | | | | / _____ \ | |\ \----.----) | | |\ | | |____ \ /\ / .----) | # |__| |__| /__/ \__\ | _| `._____|_______/ |__| \__| |_______| \__/ \__/ |_______/ # # use splinter and browser to connect to nasa website nasa_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest' browser.visit(nasa_url) time.sleep(2) # read the html html = browser.html soup = bs(html, 'html.parser') # search the most recent post for title and text news_title = soup.find("div", class_="content_title").text news_paragraph = soup.find("div", class_="article_teaser_body").text # add this data to mars_data dict mars_data["news_title"] = news_title mars_data["news_paragraph"] = news_paragraph # .___ .__. ___ .______ _______. __ .___ ___. ___ _______ _______ # | \/ | / \ | _ \ / | | | | \/ | / \ / _____|| ____| # | \ / | / ^ \ | |_) | | (----` | | | \ / | / ^ \ | | __ | |__ # | |\/| | / /_\ \ | / \ \ | | | |\/| | / /_\ \ | | |_ | | __| # | | | | / _____ \ | |\ \----.----) | | | | | | | / _____ \ | |__| | | |____ # |__| |__| /__/ \__\ | _| `._____|_______/ |__| |__| |__| /__/ \__\ \______| |_______| # connects to jpl.nasa url image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars" browser.visit(image_url) time.sleep(2) # clicks on the full image button browser.find_by_id('full_image').first.click() # opens all of the the html for the page as a big block, non prettify-able image_html = browser.html # reads through the html on the page, is prettify-able soup = bs(image_html, "html.parser") # find the specific tag and class for the image I am looking for # featured_image_url = image_url + soup.find("img", class_="fancybox-image")["src"] featured_image_url = image_url + soup.find( "a", class_="fancybox")["data-fancybox-href"] # add this data to mars_data dict mars_data["featured_image_url"] = featured_image_url # .___ ___. ___ .______ _______. ____ __ ____ _______ ___ .___________. __ __ _______ .______ # | \/ | / \ | _ \ / | \ \ / \ / / | ____| / \ | || | | | | ____|| _ \ # | \ / | / ^ \ | |_) | | (----` \ \/ \/ / | |__ / ^ \ `---| |----`| |__| | | |__ | |_) | # | |\/| | / /_\ \ | / \ \ \ / | __| / /_\ \ | | | __ | | __| | / # | | | | / _____ \ | |\ \----.----) | \ /\ / | |____ / _____ \ | | | | | | | |____ | |\ \----. # |__| |__| /__/ \__\ | _| `._____|_______/ \__/ \__/ |_______/__/ \__\ |__| |__| |__| |_______|| _| `._____| # set path to mars weather report Twitter page weather_url = "https://twitter.com/MarsWxReport?lang=en" browser.visit(weather_url) time.sleep(2) # read the html html = browser.html soup = bs(html, 'html.parser') # find the paragraph tab, mars_soup = soup.find_all("p", class_="TweetTextSize") weather_list = [] for weather in mars_soup: if re.search("Sol ", weather.text): weather_list.append(weather.text) # pull just the first weather report from the list mars_weather = weather_list[0] # add this data to mars_data dict mars_data["mars_weather"] = mars_weather # .___ ___. ___ .______ _______. _______ ___ ______ .___________. _______. # | \/ | / \ | _ \ / | | ____| / \ / || | / | # | \ / | / ^ \ | |_) | | (----` | |__ / ^ \ | ,----'`---| |----` | (----` # | |\/| | / /_\ \ | / \ \ | __| / /_\ \ | | | | \ \ # | | | | / _____ \ | |\ \----.----) | | | / _____ \ | `----. | | .----) | # |__| |__| /__/ \__\ | _| `._____|_______/ |__| /__/ \__\ \______| |__| |_______/ # set path to website url = "https://space-facts.com/mars/" time.sleep(2) # read the table at the url destination tables = pd.read_html(url) tables[0] # set up dataframe df = tables[0] df.columns = ["Categories", "Measurements"] df.set_index(["Categories"]) # convert html to df html_table = df.to_html() #replace all the \n with an empty space instead html_table.replace('\n', '') # save table as html # df.to_html("table.html") mars_data["html.table"] = "html_table" # __ __ _______ .___ ___. __ _______..______ __ __ _______ .______ _______ _______. # | | | | | ____|| \/ | | | / || _ \ | | | | | ____|| _ \ | ____| / | # | |__| | | |__ | \ / | | | | (----`| |_) | | |__| | | |__ | |_) | | |__ | (----` # | __ | | __| | |\/| | | | \ \ | ___/ | __ | | __| | / | __| \ \ # | | | | | |____ | | | | | | .----) | | | | | | | | |____ | |\ \----.| |____.----) | # |__| |__| |_______||__| |__| |__| |_______/ | _| |__| |__| |_______|| _| `._____||_______|_______/ # set up an empty list to append {title, img_url} dicts to hemispheres_list = [] # VALLES MARINERIS # set up chrome driver executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path, headless=True) # set up connection to url and click on link hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars" browser.visit(hemisphere_url) browser.click_link_by_partial_text("Schiaparelli Hemisphere Enhanced") # use BeautifulSoup to parse html data html = browser.html soup = bs(html, "html.parser") # set up link to html path valles_link = soup.find('div', 'downloads').a['href'] # set up dictionary with title and img_url valles_marineris = { "title": "Valles Marineris Hemisphere", "img_url": valles_link } # append dict to hemispheres list hemispheres_list.append(valles_marineris) # CERBERUS HEMISPHERE # set up chrome driver executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path, headless=True) # set up connection to url and click on link hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars" browser.visit(hemisphere_url) browser.click_link_by_partial_text("Cerberus Hemisphere Enhanced") # use BeautifulSoup to parse html data html = browser.html soup = bs(html, "html.parser") # set up link to html path cerberus_link = soup.find('div', 'downloads').a['href'] # set up dictionary with title and img_url cerberus = {"title": "Cerberus Hemisphere", "img_url": cerberus_link} # append dict to hemispheres list hemispheres_list.append(cerberus) #SCHIAPARELLI HEMISPHERE # set up chrome driver executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path, headless=True) # set up connection to url and click on link hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars" browser.visit(hemisphere_url) browser.click_link_by_partial_text("Cerberus Hemisphere Enhanced") # use BeautifulSoup to parse html data html = browser.html soup = bs(html, "html.parser") # set up link to html path schiaparelli_link = soup.find('div', 'downloads').a['href'] # set up dictionary with title and img_url schiaparelli = { "title": "Schiaparelli Hemisphere", "img_url": cerberus_link } # append dict to hemispheres list hemispheres_list.append(schiaparelli) # SYRTIS MAJOR HEMISPHERE # set up chrome driver executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path, headless=True) # set up connection to url and click on link hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars" browser.visit(hemisphere_url) browser.click_link_by_partial_text("Cerberus Hemisphere Enhanced") # use BeautifulSoup to parse html data html = browser.html soup = bs(html, "html.parser") # set up link to html path syrtis_link = soup.find('div', 'downloads').a['href'] # set up dictionary with title and img_url syrtis = {"title": "Syrtis Major Hemisphere", "img_url": syrtis_link} # append dict to hemispheres list hemispheres_list.append(syrtis) mars_data["hemispheres_list"] = hemispheres_list return mars_data
class BugTestCase(LiveServerTestCase): """ A set of tests to check the existence of bugs. """ def setUp(self): self.browser = Browser() signup(self, 'bob', '*****@*****.**', 'bob_secret') signin(self, 'bob', 'bob_secret') def tearDown(self): logout(self) self.browser.quit() Graph.objects.get(name="Bob's graph").destroy() def test_node_rel_count_one(self): ''' This test show that reflexive outgoing `relationships` don't count if there are more relationships. ''' real_nodes = 0 real_rels = 0 create_graph(self) create_schema(self) # Creating a nodetype: "First" self.browser.find_link_by_href( '/schemas/bobs-graph/types/create/').first.click() self.browser.find_by_name('name').first.fill("First") self.browser.find_by_name('properties-0-key').first.fill('Name') self.browser.find_by_name('properties-0-display').first.check() self.browser.find_by_value('Save Type').first.click() # Creating another nodetype: "Second" self.browser.find_link_by_href( '/schemas/bobs-graph/types/create/').first.click() self.browser.find_by_name('name').first.fill("Second") self.browser.find_by_name('properties-0-key').first.fill('Name') self.browser.find_by_name('properties-0-display').first.check() self.browser.find_by_value('Save Type').first.click() self.browser.find_by_id('dataMenu').first.click() # Creating an allowed relationship: "First -> First" self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill('FirstToFirst') self.browser.select('target', '1') self.browser.find_by_value('Save Type').first.click() # Creating an allowed relationship: "First -> Second" self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill('FirstToSecond') self.browser.select('target', '2') self.browser.find_by_value('Save Type').first.click() # Creating a node of the "First" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click() self.browser.find_by_name('Name').first.fill("First1") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "First" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click() self.browser.find_by_name('Name').first.fill("First2") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating a node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second1") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second2") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second3") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Editing the "First1" node self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption list']")[0].click() self.browser.find_by_xpath( "//td[@class='dataList']/a[@class='edit']").first.click() # Adding more "FirstToSecond" relationship forms self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() # Adding the relationships self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[0].fill('First2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[1].fill('Second1') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[2].fill('Second2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[3].fill('Second3') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 # Saving "First1" self.browser.find_by_value('Save First').first.click() # Checking the counts self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() nodes = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-nodes']" ).first.value rels = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-relationships']" ).first.value self.assertEqual(str(real_nodes) + " nodes", nodes) self.assertEqual(str(real_rels) + " relationships", rels) def test_node_rel_count_two(self): ''' This test shows that new `nodes` with relationships don't count. ''' real_nodes = 0 real_rels = 0 create_graph(self) create_schema(self) # Creating a nodetype: "First" self.browser.find_link_by_href( '/schemas/bobs-graph/types/create/').first.click() self.browser.find_by_name('name').first.fill("First") self.browser.find_by_name('properties-0-key').first.fill('Name') self.browser.find_by_name('properties-0-display').first.check() self.browser.find_by_value('Save Type').first.click() # Creating another nodetype: "Second" self.browser.find_link_by_href( '/schemas/bobs-graph/types/create/').first.click() self.browser.find_by_name('name').first.fill("Second") self.browser.find_by_name('properties-0-key').first.fill('Name') self.browser.find_by_name('properties-0-display').first.check() self.browser.find_by_value('Save Type').first.click() self.browser.find_by_id('dataMenu').first.click() # Creating an allowed relationship: "First -> First" self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill('FirstToFirst') self.browser.select('target', '1') self.browser.find_by_value('Save Type').first.click() # Creating an allowed relationship: "First -> Second" self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill('FirstToSecond') self.browser.select('target', '2') self.browser.find_by_value('Save Type').first.click() # Creating a node of the "First" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click() self.browser.find_by_name('Name').first.fill("First1") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "First" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click() self.browser.find_by_name('Name').first.fill("First2") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating a node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second1") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second2") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second3") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Editing the "First1" node self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption list']")[0].click() self.browser.find_by_xpath( "//td[@class='dataList']/a[@class='edit']").first.click() # Adding more "FirstToSecond" relationship forms self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() # Adding the relationships self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[0].fill('First2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[1].fill('Second1') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[2].fill('Second2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[3].fill('Second3') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 # Saving "First1" self.browser.find_by_value('Save First').first.click() # Creating another node of the "First" type with relationships self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click() self.browser.find_by_name('Name').first.fill("First3") # Adding more "FirstToSecond" relationship forms self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() # Adding the relationships self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[0].fill('First1') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[1].fill('Second1') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[2].fill('Second2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[3].fill('Second3') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 # Saving "First3" self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Checking the counts self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() nodes = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-nodes']" ).first.value rels = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-relationships']" ).first.value self.assertEqual(str(real_nodes) + " nodes", nodes) self.assertEqual(str(real_rels) + " relationships", rels) def test_node_rel_count_three(self): ''' This test show that reflexive outgoing `relationships` DO count if there are NO more relationships. ''' real_nodes = 0 real_rels = 0 create_graph(self) create_schema(self) # Creating a nodetype: "First" self.browser.find_link_by_href( '/schemas/bobs-graph/types/create/').first.click() self.browser.find_by_name('name').first.fill("First") self.browser.find_by_name('properties-0-key').first.fill('Name') self.browser.find_by_name('properties-0-display').first.check() self.browser.find_by_value('Save Type').first.click() # Creating another nodetype: "Second" self.browser.find_link_by_href( '/schemas/bobs-graph/types/create/').first.click() self.browser.find_by_name('name').first.fill("Second") self.browser.find_by_name('properties-0-key').first.fill('Name') self.browser.find_by_name('properties-0-display').first.check() self.browser.find_by_value('Save Type').first.click() self.browser.find_by_id('dataMenu').first.click() # Creating an allowed relationship: "First -> First" self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill('FirstToFirst') self.browser.select('target', '1') self.browser.find_by_value('Save Type').first.click() # Creating an allowed relationship: "First -> Second" self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill('FirstToSecond') self.browser.select('target', '2') self.browser.find_by_value('Save Type').first.click() # Creating a node of the "First" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click() self.browser.find_by_name('Name').first.fill("First1") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "First" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click() self.browser.find_by_name('Name').first.fill("First2") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating a node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second1") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second2") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second3") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Editing the "First1" node self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption list']")[0].click() self.browser.find_by_xpath( "//td[@class='dataList']/a[@class='edit']").first.click() # Adding more "FirstToSecond" relationship forms self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() # Adding the relationships self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[0].fill('First2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[1].fill('Second1') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[2].fill('Second2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[3].fill('Second3') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 # Saving "First1" self.browser.find_by_value('Save First').first.click() # Creating another node of the "First" type with relationships self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click() self.browser.find_by_name('Name').first.fill("First3") # Adding more "FirstToFirst" outgoing relationship forms self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[0].click() # Adding the relationships self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[0].fill('First1') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[1].fill('First2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 # Saving "First3" self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Checking the counts self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() nodes = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-nodes']" ).first.value rels = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-relationships']" ).first.value self.assertEqual(str(real_nodes) + " nodes", nodes) self.assertEqual(str(real_rels) + " relationships", rels) def test_node_rel_count_four(self): ''' This test show that when there are reflexive incoming `relationships` only count those. ''' real_nodes = 0 real_rels = 0 create_graph(self) create_schema(self) # Creating a nodetype: "First" self.browser.find_link_by_href( '/schemas/bobs-graph/types/create/').first.click() self.browser.find_by_name('name').first.fill("First") self.browser.find_by_name('properties-0-key').first.fill('Name') self.browser.find_by_name('properties-0-display').first.check() self.browser.find_by_value('Save Type').first.click() # Creating another nodetype: "Second" self.browser.find_link_by_href( '/schemas/bobs-graph/types/create/').first.click() self.browser.find_by_name('name').first.fill("Second") self.browser.find_by_name('properties-0-key').first.fill('Name') self.browser.find_by_name('properties-0-display').first.check() self.browser.find_by_value('Save Type').first.click() self.browser.find_by_id('dataMenu').first.click() # Creating an allowed relationship: "First -> First" self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill('FirstToFirst') self.browser.select('target', '1') self.browser.find_by_value('Save Type').first.click() # Creating an allowed relationship: "First -> Second" self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill('FirstToSecond') self.browser.select('target', '2') self.browser.find_by_value('Save Type').first.click() # Creating a node of the "First" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click() self.browser.find_by_name('Name').first.fill("First1") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "First" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click() self.browser.find_by_name('Name').first.fill("First2") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating a node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second1") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second2") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Creating another node of the "Second" type self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click() self.browser.find_by_name('Name').first.fill("Second3") self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Editing the "First1" node self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption list']")[0].click() self.browser.find_by_xpath( "//td[@class='dataList']/a[@class='edit']").first.click() # Adding more "FirstToSecond" relationship forms self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() # Adding the relationships self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[0].fill('First2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[1].fill('Second1') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[2].fill('Second2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[3].fill('Second3') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 # Saving "First1" self.browser.find_by_value('Save First').first.click() # Creating another node of the "First" type with relationships self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click() self.browser.find_by_name('Name').first.fill("First3") # Adding more "FirstToSecond" relationship forms self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[1].click() self.browser.find_by_xpath( "//a[@class='addButton inFormsets']")[2].click() # Adding the relationships self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[1].fill('Second1') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[2].fill('Second2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[3].fill('Second3') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[4].fill('First1') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input")[5].fill('First2') self.browser.is_element_present_by_id("id_user_wait", 3) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() real_rels += 1 # Saving "First3" self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkLeft']/input" ).first.click() real_nodes += 1 # Checking the counts self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() nodes = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-nodes']" ).first.value rels = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-relationships']" ).first.value self.assertEqual(str(real_nodes) + " nodes", nodes) self.assertEqual(str(real_rels) + " relationships", rels)
print("....the latest NASA news.....") print("Title: " + title) print("Title Paragraph: " + title_para) # # JPL Mars Space Images - Featured Image # # In[8]: # openning the url on chrome url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars" browser.visit(url) # In[9]: full_image = browser.find_by_id('full_image') full_image.click() # In[10]: browser.is_element_present_by_text('more info', wait_time=1) more_info = browser.find_link_by_partial_text('more info') more_info.click() # In[11]: html = browser.html image_soup = bs(html, 'html.parser') # In[12]:
def open_door(): start_time = time.time() browser = Browser('phantomjs') browser.visit("https://www.chalmersstudentbostader.se/login") browser.fill('log', options.username) browser.fill('pwd', options.password) while browser.find_by_text('Logga in') == []: sleep(0.05) btn = browser.find_by_text('Logga in')[1] btn.click() while True: while browser.find_by_text('Öppna porten') == []: sleep(0.05) port = browser.find_by_text('Öppna porten').first if not "{aptusUrl}" in port['href']: break sleep(0.1) browser.visit(port['href']) door_ids = parse_door_ids(options.door_ids) for door_id in door_ids: while browser.find_by_id(door_id) == []: sleep(0.1) print("Opening door with id: " + door_id) btn = browser.find_by_id(door_id).first btn.click() print(time.time()-start_time)