Python Browser.find_by_id 예제들, splinter.Browser.find_by_id Python 예제들

예제 #1

0

파일 보기

파일: user.py 프로젝트: gavinmbell/esgf-test-suite

class UserUtils(object):
        def __init__(self):
                self.config = config.read_config()
		self.account = self.config['account']
                self.idp_server = self.config['nodes']['idp_node']
		
		# Abort test if esgf-web-fe is not reachable
		r = requests.get("https://{0}/esgf-web-fe".format(self.idp_server), verify=False, timeout=1)
                assert r.status_code == 200

		self.browser = Browser('firefox')

		# Mapping user data to fit to web-fe user creation form 
                self.elements = {'firstName' : self.account['firstname'],
                                 'lastName'  : self.account['lastname'],
                                 'email'     : self.account['email'],
                                 'userName'  : self.account['username'],
                                 'password1' : self.account['password'],
                                 'password2' : self.account['password']}


	def check_user_exists(self):
		URL = "https://{0}/esgf-web-fe/login".format(self.idp_server)
		OpenID = "https://{0}/esgf-idp/openid/{1}".format(self.idp_server, self.account['username'])

		# Try to log in
		self.browser.visit(URL)
		self.browser.find_by_id('openid_identifier').fill(OpenID)
		self.browser.find_by_value('Login').click()

		# User does not exist if unable to resolve OpenID
		if(self.browser.is_text_present("Error: unable to resolve OpenID identifier")):
			self.user_exists = False
		else:
			self.user_exists = True
		
        def create_user(self):
		URL = "https://{0}/esgf-web-fe/createAccount".format(self.idp_server)
        	self.browser.visit(URL)
	
		# Filling the form
		for element_name in self.elements:
			self.browser.find_by_name(element_name).fill(self.elements[element_name])

      		self.browser.find_by_value('Submit').click()

		# Parsing response
		self.response = []		
		if (self.browser.is_text_present("SUCCESS") == True):
			self.response.append("SUCCESS")
		else:
			self.response.append("FAILURE")
			selection = self.browser.find_by_tag('span')
			for sel in selection:
				if sel.has_class('myerror'):
					self.response.append(sel.value)


        def exit_browser(self):
		self.browser.quit()

예제 #2

0

파일 보기

파일: checkin.py 프로젝트: andrewdildy/flight-checkin

def checkin(confirmation, first, last, email, run_time):
    """Check in to a flight.

    Arguments:
    confirmation -- your confirmation number
    first -- your first name
    last -- your last name
    email -- email address to send confirmation
    run_time -- the time you need to check in
    """
    browser = Browser('zope.testbrowser', ignore_robots=True)
    browser.visit('https://www.southwest.com/flight/retrieveCheckinDoc.html')
    browser.fill('confirmationNumber', confirmation)
    browser.fill('firstName', first)
    browser.fill('lastName', last)

    delta = run_time - datetime.now()
    time.sleep(delta.total_seconds())

    submit = browser.find_by_name('submitButton')
    submit.click()
    if browser.find_by_id('errors_props_wrapper') != []:
        browser.quit()
        raise RuntimeError("Info is incorrect or you checked in too early")

    check_in = browser.find_by_name('printDocuments')
    check_in.click()

    email = browser.find_by_id('optionEmail')
    email.click()
    browser.fill('emailAddress', email)
    cont = browser.find_by_name('book_now')
    cont.click()

    browser.quit()

예제 #3

0

파일 보기

파일: courtbooking.py 프로젝트: bharath23/tchotchke

def court_booking_login(user, passwd):
    """
    Create a new browser instance and login to the website
    """
    browser = Browser()
    browser.visit("https://courtbooking.bayclubs.com")
    if browser.status_code != 200:
        logging.error("court_booking_login: Unable to open court booking "
                      "website")
        browser.quit()
        return None

    input_email = browser.find_by_id("InputEmail1")
    input_email.fill(user)
    input_passwd = browser.find_by_id("InputPassword1")
    input_passwd.fill(passwd)
    login_button = browser.find_by_id("loginButton")
    login_button.click()
    if browser.status_code != 200:
        logging.error("court_booking_login: Error unable to login into court "
                      "booking website")
        browser.quit()
        return None

    if browser.is_element_present_by_id("loginresult", wait_time=5):
        logging.error("court_booking_login: Incorrect login credentials")
        browser.quit()
        return None

    return browser

예제 #4

0

파일 보기

파일: shenanigans.py 프로젝트: chaosking121/mal

def enable():
    import time
    import requests
    import settings
    from splinter import Browser
    from xvfbwrapper import Xvfb

    print "Trying to enable myself."
    vdisplay = Xvfb()
    vdisplay.start()


    email = settings.getEmail()
    password = settings.getPassword()
    team_name = settings.getTeamName()
    bot_user = settings.getBotUser()

    browser = Browser('chrome')
    url = 'https://{}.slack.com/services/{}'.format(team_name, bot_user)
    browser.visit(url)
    browser.fill('email', email)
    browser.fill('password', password)
    browser.find_by_id('signin_btn').first.click()
    browser.find_link_by_text('Enable').first.click()
    time.sleep(2) # Sometimes I saw a crash where there was no alert, so we'll wait a bit first.
    alert = browser.get_alert()
    alert.accept()
    time.sleep(2) # If you close the display too quickly, the request doesn't get processed.

    vdisplay.stop()

예제 #5

0

파일 보기

파일: googleBrowser.py 프로젝트: kikutou/amazonAutoCharge

def google_login(user_name, password, code):

    browser = Browser('firefox')

    url = 'https://accounts.google.com/ServiceLogin'

    browser.visit(url)

    browser.find_by_id('Email').fill(user_name)

    browser.find_by_id('next').click()

    browser.find_by_id('Passwd').fill(password)

    browser.find_by_id('signIn').click()

    url1 = 'https://play.google.com/store?hl=jp'

    browser.visit(url1)

    browser.find_by_css('button.id-no-menu-change').click()

    time.sleep(1)

    browser.find_by_css('input.redeem-input-text-box').fill(code)

    browser.find_by_id('id-redeem-ok-button').click()

    time.sleep(2)

    result = browser.find_by_css('div.redeem-invalid-code-msg').value

    browser.quit()

    return result

예제 #6

0

파일 보기

파일: test_login.py 프로젝트: chybatronik/task-manager-gae

class LoginTestCase(unittest.TestCase):

  def setUp(self):
    self.testbed = testbed.Testbed()
    self.testbed.activate()
    self.testbed.init_datastore_v3_stub()
    self.testbed.init_memcache_stub()
    self.browser = Browser('chrome')

  def tearDown(self):
    self.testbed.deactivate()

  def test_login(self):
    self.browser.visit("http://127.0.0.1:8080/")
    self.assertEqual(self.browser.find_by_tag("h3").first.text, "Not logged in")

    self.browser.find_by_id("submit-login").first.click()
    self.assertEqual(self.browser.find_link_by_text("Insurance").first.text, "Insurance")

  def test_logout(self):
    self.browser.visit("http://127.0.0.1:8080/")
    self.assertEqual(self.browser.find_by_tag("h3").first.text, "Not logged in")

    self.browser.find_by_id("submit-login").first.click()
    self.assertEqual(self.browser.find_link_by_text("Insurance").first.text, "Insurance")

    self.browser.find_link_by_text("Log out").first.click()
    self.assertEqual(self.browser.find_by_tag("h3").first.text, "Not logged in")

예제 #7

0

파일 보기

class UserUtils(object):
        def __init__(self):
                self.config = config.read_config()
		self.account = self.config['account']
                self.idp_server = self.config['nodes']['idp_node']
		
		# Abort test if esgf-web-fe is not reachable
		r = requests.get("https://{0}/user/add".format(self.idp_server), verify=False, timeout=1)
                assert r.status_code == 200

		self.browser = Browser('firefox')

		# Mapping user data to fit to web-fe user creation form 
                self.elements = {'first_name' : self.account['firstname'],
                                 'last_name'  : self.account['lastname'],
                                 'email'     : self.account['email'],
                                 'username'  : self.account['username'],
                                 'password' : self.account['password'],
                                 'confirm_password' : self.account['password'],
				 'institution' : self.account['institution'],
				 'city' : self.account['city'],
				 'country' : self.account['country']}


	def check_user_exists(self):
		URL = "https://{0}/login".format(self.idp_server)
		OpenID = "https://{0}/esgf-idp/openid/{1}".format(self.idp_server, self.account['username'])

		# Try to log in
		self.browser.visit(URL)
		self.browser.find_by_id('openid_identifier').fill(OpenID)
		self.browser.find_by_value('Login').click()

		# User does not exist if unable to resolve OpenID
		if(self.browser.is_text_present("OpenID Discovery Error: unrecognized by the Identity Provider.")):
			self.user_exists = False
		else:
			self.user_exists = True
		
        def create_user(self):
		URL = "https://{0}/user/add".format(self.idp_server)
        	self.browser.visit(URL)
	
		# Filling the form
		for element_name in self.elements:
			self.browser.find_by_name(element_name).fill(self.elements[element_name])

      		self.browser.find_by_value('Submit').click()

		# Parsing response
		self.response = []		
		if (self.browser.is_text_present("Thank you for creating an account. You can now login.") == True):
			self.response.append("SUCCESS")
		else:
			self.response.append("FAILURE")


        def exit_browser(self):
		self.browser.quit()

예제 #8

0

파일 보기

파일: scraper.py 프로젝트: ErinClark/td_enot_be

def browse(url):

    browser = Browser("phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
    browser.visit(url)
    browser.find_by_id('RadioW03').first.click()
    browser.find_by_id('RadioS03').first.click()
    browser.find_by_text('\n                Search\n            ').first.click()
    return browser

예제 #9

0

파일 보기

파일: kronos_scraper_for_dropdowns.py 프로젝트: aldwyn/yape

	def __scrape(self, landing_page):
		browser = Browser('chrome', executable_path='C:\Python27\Lib\site-packages\chromedriver_win32\chromedriver.exe', service_args=PROXIES)
		# browser = Browser('phantomjs', service_args=PROXIES, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0')
		with browser:
			template1 = True
			browser.visit(landing_page)
			time.sleep(2)

			nav = [x for x in browser.find_by_css('a.nav') if (x.text == 'Jobs by Location' or x.text == 'By Location')]
			if len(nav) > 0:
				nav[0].click()
			else:
				template1 = False
			link = browser.url
			state_index = 1
			city_index = 1

			while True:
				browser.visit(link)
				if not template1:
					nav = browser.find_by_css('#tabHeader')
					nav = nav.find_by_css('a')
					nav[1].click()
				states = browser.find_by_name('search.stateList.value')
				state_list = states.find_by_tag('option')
				print state_list[state_index].text
				state_list[state_index].click()
				if state_list[state_index].text != 'choose one...':
					element = 'cityList_' + state_list[state_index].text
					cities = browser.find_by_id(element)
					city_list = cities.find_by_tag('option')
					city_list[city_index].click()
					if city_list[city_index].text != 'choose one...':
						print city_list[city_index].text, state_list[state_index].text
						browser.find_by_id('cityStateSearch').click()
						links = None
						try:
							links = browser.find_by_css('a.withBubble')
						except:
							pass

						if len(links) > 0:
							for i in links:
								b = Browser('chrome', executable_path='C:\Python27\Lib\site-packages\chromedriver_win32\chromedriver.exe', service_args=PROXIES)
								# b = Browser('phantomjs', service_args=PROXIES, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0')
								with b:
									b.visit(i['href'])
									self.__navigate_pages(b)
						else:
							self.__navigate_pages(browser)
					city_index += 1
					if city_index == len(city_list):
						city_index = 0
						state_index += 1
						if state_index == len(state_list):
							break
				else:
					state_index += 1

예제 #10

0

파일 보기

파일: pair_steps.py 프로젝트: bitpayclone/bitpay-python

def get_claim_code_from_server():
  browser = Browser('phantomjs', service_args=['--ignore-ssl-errors=true'])
  browser.visit(ROOT_ADDRESS + "/merchant-login")
  browser.fill_form({"email": USER_NAME, "password": PASSWORD})
  browser.find_by_id("loginButton")[0].click()
  time.sleep(5)
  browser.visit(ROOT_ADDRESS + "/api-tokens")
  browser.find_by_css(".token-access-new-button").find_by_css(".btn").find_by_css(".icon-plus")[0].click()
  browser.find_by_id("token-new-form").find_by_css(".btn")[0].click()
  return browser.find_by_css(".token-claimcode")[0].html

예제 #11

0

파일 보기

파일: scrape.py 프로젝트: spandya108/1001tracklistutils

class TrackListScraper(object):

    def __init__(self, artists, year):
        self.browser = Browser('chrome')
        self.artists = artists
        self.year = year
        self.browser.visit('http://1001tracklists.com')

    def execute_full_scrape(self):
        artist_tracklists = {}
        for artist in self.artists:
            artist_tracklists[artist] = self.scrape_per_artist(artist)
        self.browser.quit()
        return artist_tracklists

    def scrape_per_artist(self, artist):
        """Execute the same scrape but instead using the python splinter library
        """

        self.browser.fill('main_search', artist + ' edc ' + self.year)

        self.browser.find_by_id('btn_search').first.click()

        try:
            self.browser.click_link_by_partial_text('2014-06-')
            track_strings = self.get_track_list_for_set(artist)
            return track_strings
        except ElementDoesNotExist:
            pass

    def get_track_list_for_set(self, artist):
        soup = BeautifulSoup(self.browser.html)
        track_values = soup.find_all('div', class_='trackValue')

        track_strings = []
        file = open('tracklist-' + artist + '-edc' + self.year, 'w')
        for track in track_values:
            if track.a:
                track_string = track.a.string
                file.write(track_string)
                # track details in format [artist, trackname]
                track_details = self.parse_track_string(track_string)
                track_strings.append(track_details)
        file.close()
        return track_strings

    def parse_track_string(self, track_string):
        track_info = track_string.strip().split('-')
        for i in range(len(track_info)):
            track_info[i] = track_info[i].strip()
        return track_info

예제 #12

0

파일 보기

파일: test_3_endpoints.py 프로젝트: ncarenton/esgf-test-suite

	def test_0_http_browser_download(self):
		path = self.get_endpoint_path('HTTPServer')
		url = "http://{0}/thredds/fileServer/{1}".format(self.data_node, path)
	
		OpenID = "https://{0}/esgf-idp/openid/{1}".format(self.idp_node, self.username)

	        pf={'browser.helperApps.neverAsk.saveToDisk':'application/x-netcdf, application/netcdf'}

		browser = Browser('firefox', profile_preferences=pf)
		browser.visit(url)

		if browser.status_code.is_success() is True:
			browser.quit()
			return

		browser.find_by_id('openid_identifier').fill(OpenID)
 		browser.find_by_value('GO').click()

		browser.find_by_id('password').fill(self.password)
		browser.find_by_value('SUBMIT').click()
		
		# To Do only if user is not enrolled in a group
		if browser.is_text_present('Group Registration Request'):
			# Chosing First Registration Group
			browser.find_by_id('button_1').click()
		
			# Accepting License Agreement
			browser.execute_script('myForm.submit();')

			# Clicking on 'Download data button'
			browser.find_by_id('goButton').click()

		browser.quit()

예제 #13

0

파일 보기

파일: favorbot.py 프로젝트: shbhrsaha/favorbot

def twitter_login(username, password):
    """
    Log in to Twitter and returns browser object
    """

    browser = Browser()

    # Login
    browser.visit("https://www.twitter.com/")
    browser.find_by_id("signin-email").first.value = username
    browser.find_by_id("signin-password").first.value = password
    browser.find_by_css(".js-submit").first.click()

    return browser

예제 #14

0

파일 보기

파일: lockdown.py 프로젝트: PrometheanInfoSec/Lockdown

	def passwd(self):
		if len(self.login) < 1 or len(self.panic) < 1 or len(self.user) < 1:
			return false
	
		b = Browser()
		b.driver.set_window_size(900,900)
		try:
		    b.visit("https://accounts.google.com/ServiceLogin?service=accountsettings")
		    b.fill('Email',self.user)
		    btn = b.find_by_id("next")
		    btn.click()
		    b.fill('Passwd',self.login)
		    btn = poll_for("#signIn", b)
		    
		    btn.click()
		    
		    b.visit("https://myaccount.google.com/security#signin")
		    btn = b.find_by_css(".vkq40d").first
		    if not btn == None:
			print "not none"
			btn.click()
			poll_fill('Email',self.user, b)
                        btn = b.find_by_id("next")
                        btn.click()
                        poll_fill('Passwd',self.login, b)
                        btn = b.find_by_id("signIn")
                        btn.click()

		    time.sleep(INV)
		    btn = poll_for(".TCRTM", b)
		    btn.click()
		    poll_fill('Passwd',self.login, b)
		    btn = b.find_by_id("signIn")
		    btn.click()
		    p = poll_for(".Hj", b)
		    p.fill(self.panic)
		    p = b.find_by_css(".Hj")[1]
		    p.fill(self.panic)
		    btn = b.find_by_css(".Ya")
		    btn.click()
		    time.sleep(INV*5)
		    b.quit()
		except:
		    traceback.print_exc(file=sys.stdout)
            	    raw_input("Something went wrong...")
		    b.quit()
		    if self.attempts < self.max_attempts:
		        self.attempts += 1
		        self.passwd()

예제 #15

0

파일 보기

파일: swflights.py 프로젝트: kwkelly/swflights

def check_in(self, conf_number, first_name, last_name):

    browser = Browser('phantomjs')
    browser.visit('https://www.southwest.com/')

    checkin_form_button = browser.find_by_id('booking-form--check-in-tab')[0]
    checkin_form_button.click()

    browser.fill('confirmationNumber', conf_number)
    browser.fill('firstName', first_name)
    browser.fill('lastName', last_name)

    checkin_button = browser.find_by_id('jb-button-check-in')[0]
    checkin_button.click()

    submit_button = browser.find_by_id('submitButton')[0]
    submit_button.click()

예제 #16

0

파일 보기

파일: lockdown.py 프로젝트: rjmolesa/Lockdown

    def passwd(self):
        b = Browser()
        b.driver.set_window_size(900, 900)
        b.visit("https://twitter.com")
        btn = b.find_by_css(".js-login")
        btn.click()
        b.find_by_id("signin-email").fill(self.user)
        b.find_by_id("signin-password").fill(self.login)
        btn = b.find_by_value("Log in")
        btn.click()
        b.visit("https://twitter.com/settings/password")
        b.fill("current_password", self.login)
        b.fill("user_password", self.panic)
        b.fill("user_password_confirmation", self.panic)

        btn = b.find_by_text("Save changes")
        btn.click()
        b.quit()

예제 #17

0

파일 보기

파일: FetchInfoByBrowser.py 프로젝트: ourzizz/gwycf

def splinter(url):
    #"""""""""""""""""""""""""MySQL DEF**********************************************
    conn = MySQLdb.connect(host='192.168.1.8',user='******',passwd='123123',db='gwycf')
    cursor = conn.cursor()#create cursor operate db
    #"""""""""""""""""""""""""MySQL DEF**********************************************
    data = xlrd.open_workbook('./chafen.xlsx')
    table = data.sheets()[0]
    nrows = table.nrows 
    ncols = table.ncols
    print nrows
    
    browser = Browser('firefox')
#    browser = Browser('chrome')
    dir(browser)
    browser.visit(url)
    time.sleep(5)
    count = 0
    #<================================================>
    for i in range(nrows):
        #HaoMa = str(table.row_values(i)[1]).split(".")[0]
        name = table.row_values(i)[0]
        HaoMa = table.row_values(i)[1]
#        epost = table.row_values(i)[2]

        browser.find_by_name('TxtName').fill(name)
        browser.find_by_name('TxtHaoMa').fill(HaoMa)
        browser.find_by_id('btnSubmit').click()
	#=================获取页面数据=====================
        epost = browser.find_by_tag('td')[10].value
        ecode = browser.find_by_tag('td')[14].value
        xingce = browser.find_by_tag('td')[16].value
        shenlun = browser.find_by_tag('td')[18].value
        jiafen = browser.find_by_tag('td')[20].value
        zongfen = browser.find_by_tag('td')[22].value
	#=================获取页面数据======================
        query = u"insert into info values('%s','%s','%s','%s','%s','%s','%s','%s',0)" % (name,HaoMa,epost,ecode,xingce,shenlun,jiafen,zongfen)
        print count,query
        cursor.execute(query.encode('utf-8')) #原始数据可以根据gbk运行无错，现在改成utf8
        conn.commit()
        browser.back()
        count = count +1
    cursor.close()
    conn.commit()
    conn.close()

예제 #18

0

파일 보기

파일: shenanigans.py 프로젝트: chaosking121/mal

def getAuth():
    import requests
    import settings
    from time import sleep
    from splinter import Browser
    from xvfbwrapper import Xvfb

    vdisplay = Xvfb(width=3840, height=2160)
    vdisplay.start()

    client_id = settings.getClientID()
    email = settings.getEmail()
    password = settings.getPassword()
    team_name = settings.getTeamName()

    scopes = ('users:read', 'channels:write', 'channels:history', 'channels:read',
            'chat:write:bot', 'chat:write:bot', 'chat:write:bot', 'dnd:write',
            'dnd:read', 'emoji:read', 'files:write:user', 'files:read', 'groups:write',
            'groups:history', 'groups:read', 'im:write', 'im:history', 'im:read',
            'mpim:write', 'mpim:history', 'mpim:read', 'pins:write', 'pins:read',
            'reactions:write', 'reactions:read', 'reminders:write', 'reminders:read',
            'search:read', 'stars:write', 'stars:read', 'team:read', 'usergroups:write',
            'usergroups:read', 'users:write', 'admin')

    scopeStr = '+'.join(scopes)

    browser = Browser('chrome')
    url = 'https://slack.com/oauth/authorize?client_id={}&scope={}&state=test'.format(client_id, scopeStr)
    browser.visit(url)
    browser.fill('domain', team_name)
    browser.find_by_id('submit_team_domain').first.click()
    browser.fill('email', email)
    browser.fill('password', password)
    browser.find_by_id('signin_btn').first.click()
    button = browser.find_by_id('oauth_authorizify')
    button.mouse_over()
    button.click()
    codeStart = browser.url.find('=') + 1
    codeEnd = browser.url.find('&')
    code = browser.url[codeStart:codeEnd]
    browser.quit()
    vdisplay.stop()
    return code

예제 #19

0

파일 보기

파일: test-reboot.py 프로젝트: Arcko/freenas

def main(argv):

    try:
        opts, args = getopt.getopt(sys.argv[1:], "f:")
    except getopt.GetoptError as err:
        sys.exit(2)

    global test_config
    config_file_name = None

    for o, a in opts:
        if o == "-f":
            config_file_name = a
        else:
            assert False, "unhandled option"

    if config_file_name is None:
        usage(argv)
        sys.exit(1)

    config_file = open(config_file_name, "r")
    test_config = json.load(config_file)

    browser = Browser()
    browser.visit(test_config['url'])

    # log in
    browser.find_by_id('id_username').fill(test_config['username'])
    browser.find_by_id('id_password').fill(test_config['password'])
    browser.find_by_id('dijit_form_Button_0_label').click()

    l = browser.find_by_xpath("//div[@id='treeNode_reboot']/div")
    time.sleep(2)
    l.first.click()

    # The button labelled Cancel in the code is actually the button which does the Reboot!
    l = browser.find_by_id("btn_Reboot_Cancel")
    time.sleep(2)
    l.first.click()

    time.sleep(5)
    browser.quit()

예제 #20

0

파일 보기

파일: scraper.py 프로젝트: ErinClark/td_competefor

def get_login(url):

    browser = Browser("phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
    browser.visit(url)

    browser.find_by_id('username').fill('*****@*****.**') # MORPH
    #browser.find_by_id('username').fill('*****@*****.**')
    #browser.find_by_id('username').fill('*****@*****.**')
    #browser.find_by_id('username').fill('*****@*****.**') # Morph uk
    browser.find_by_id('password').fill('Nrjn1gsa')
    browser.find_by_name('submit').first.click()
    time.sleep(1)
    print browser.url
    browser.click_link_by_href("/business/opportunitySearchForm.html")
    time.sleep(1)
    browser.select('status', "")
    browser.find_by_value("Search").first.click()
    time.sleep(2)
    print browser.url
    return browser

예제 #21

0

파일 보기

파일: kronos_scraper.py 프로젝트: aldwyn/yape

	def run(self):
		browser = Browser('chrome', executable_path="C:\Python27\Lib\site-packages\chromedriver_win32\chromedriver.exe", service_args=PROXIES)
		# browser = Browser('phantomjs', service_args=PROXIES, user_agent="Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0")
		with browser:
			page = 1
			browser.visit(self.url)
			browser.fill("p", self.keyword)
			browser.find_by_id("search-submit").click()

			while True:
				time.sleep(10)
				logging.info("Page " + str(page))
				for link in browser.find_by_css("div.res"):
					if "applicationname" in link.find_by_css("a").first["href"].lower():
						self.queue.put(link.find_by_css("a").first["href"])
				page += 1
				if browser.find_by_css("#pg-next"):
					browser.find_by_css("#pg-next").click()
				else:
					break
			self.queue.put(None)

예제 #22

0

파일 보기

파일: test-ui-004.py 프로젝트: Arcko/freenas

def main(argv):

    try:
        opts, args = getopt.getopt(sys.argv[1:], "f:")
    except getopt.GetoptError as err:
        sys.exit(2)

    global test_config
    config_file_name = None

    for o, a in opts:
        if o == "-f":
            config_file_name = a
        else:
            assert False, "unhandled option"

    if config_file_name is None:
        usage(argv)
        sys.exit(1)

    config_file = open(config_file_name, "r")
    test_config = json.load(config_file)

    browser = Browser()
    browser.visit(test_config['url'])
    browser.find_by_id('id_password').fill(test_config['password'])
    browser.find_by_id('id_confirm_password').fill(test_config['password'])
    browser.find_by_id('dijit_form_Button_0_label').click()
    browser.quit()

예제 #23

0

파일 보기

파일: makeChouseisan.py 프로젝트: nchen24/AutomaticChouseisan

def makeChouseisan(listOfDays, weekdayTime, weekendTime, autosubmit=True):
    # TODO: add Title, Comment as arguments
    browser = Browser('chrome')
    browser.visit("https://chouseisan.com/schedule/newEvent/create")

    browser.find_by_id("name").fill("Title")
    browser.find_by_id("comment").fill("Comment")
    browser.find_by_id("kouho").fill("\n".join([formatDay(day, weekdayTime, weekendTime) for day in listOfDays]))

    if autosubmit:
        browser.find_by_id("createBtn").first.click()

    while len(browser.windows) > 0:
        time.sleep(10)

예제 #24

0

파일 보기

파일: test_3_endpoints.py 프로젝트: ESGF/esgf-test-suite

    def test_0_http_browser_download(self):
        path = self.get_endpoint_path("HTTPServer")
        url = "http://{0}/thredds/fileServer/{1}".format(self.data_node, path)

        OpenID = "https://{0}/esgf-idp/openid/{1}".format(self.idp_node, self.username)

        pf = {"browser.helperApps.neverAsk.saveToDisk": "application/x-netcdf, application/netcdf"}

        browser = Browser("firefox", profile_preferences=pf)
        browser.visit(url)

        if browser.status_code.is_success() is True:
            browser.quit()
            return

        browser.find_by_css("input.custom-combobox-input").fill(OpenID)
        browser.find_by_value("GO").click()

        browser.find_by_id("password").fill(self.password)
        browser.find_by_value("SUBMIT").click()

        # To Do only if user is not enrolled in a group
        if browser.is_text_present("Group Registration Request"):
            # Chosing First Registration Group
            browser.find_by_id("button_1").click()

            # Accepting License Agreement
            browser.execute_script("myForm.submit();")

            # Clicking on 'Download data button'
            browser.find_by_id("goButton").click()

        browser.quit()

예제 #25

0

파일 보기

파일: test-ui-003.py 프로젝트: Arcko/freenas

def main(argv):

    try:
        opts, args = getopt.getopt(sys.argv[1:], "f:")
    except getopt.GetoptError as err:
        sys.exit(2)

    global test_config
    config_file_name = None

    for o, a in opts:
        if o == "-f":
            config_file_name = a
        else:
            assert False, "unhandled option"

    if config_file_name is None:
        usage(argv)
        sys.exit(1)

    config_file = open(config_file_name, "r")
    test_config = json.load(config_file)

    browser = Browser()
    browser.visit(test_config['url'])
    e = browser.find_by_xpath("//div[@id='treeNode_account']/div/span")
    e.click()
    e = browser.find_by_xpath("//div[@id='treeNode_account.AdminAccount']/div/span")
    e.click()
    e = browser.find_by_xpath("//div[@id='treeNode_account.AdminAccount.ChangePass']/div/span[3]/span[2]")
    time.sleep(1)
    e.click()
    browser.find_by_id('id_new_password').fill(test_config['password'])
    browser.find_by_id('id_new_password2').fill(test_config['password'])
    browser.find_by_id('btn_PasswordChangeForm_Ok_label').click()

예제 #26

0

파일 보기

파일: scraper.py 프로젝트: ErinClark/td_competefor_uk

def get_login(url):

    browser = Browser("phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
    browser.visit(url)

    #browser.find_by_id('username').fill('*****@*****.**') # MORPH
    #browser.find_by_id('username').fill('*****@*****.**')

    #browser.find_by_id('username').fill('*****@*****.**')
    browser.find_by_id('username').fill('*****@*****.**')   # MORPH UK
    browser.find_by_id('password').fill('Nrjn1gsa')
    browser.find_by_name('submit').first.click()
    time.sleep(1)
    print browser.url
    try:
        browser.click_link_by_href("/business/opportunitySearchForm.html")
        time.sleep(1)
        browser.click_link_by_href("opportunityAdvancedSearchForm.html")
        time.sleep(2)
        #browser.find_by_value('All').first.click()
        browser.select('status', "")
        browser.select('area', "9")         # 'area' is 'class name' not just name?
        time.sleep(3)
        print browser.find_by_value('Add All')              #TODO
        print browser.html
        browser.find_by_value('Add All').first.click()
        print 'added all England only' #TODO
        time.sleep(2)
        browser.find_by_value("Search").first.click()
        time.sleep(2)
    except Exception as e:
        print 'error: ', e
        browser.click_link_by_href("/business/logoutHosts.html")
        time.sleep(4)
        browser.quit()
        sys.exit("login failed")
    print browser.url
    return browser

예제 #27

0

파일 보기

파일: monster.py 프로젝트: john2x/scaling-fortnight

    def _browser(self, qry, locale=None, country=None):
        # location
        browser = Browser('phantomjs')
        if country == "Canada":
          browser.visit('http://jobsearch.monster.ca/')
        else:
          browser.visit('http://jobsearch.monster.com/')
        time.sleep(1)
        #qry = "inside sales"
        #browser.find_by_css('.b1 > input').first.fill('"{0}"'.format(qry))
        if qry[0] == '"' and qry[-1] == '"':
          browser.find_by_css('.b1 > input').first.fill('{0}'.format(qry))
        elif qry[0] == "'" and qry[-1] == "'":
          browser.find_by_css('.b1 > input').first.fill('{0}'.format(qry))
        else:
          browser.find_by_css('.b1 > input').first.fill('"{0}"'.format(qry))

        browser.find_by_css('.b1 > input').first.fill('{0}'.format(locale))
        browser.find_by_css('.searchButton').first.click()
        time.sleep(1)
        browser.find_by_css('#sort-by').first.click()
        browser.find_by_id('sort-by-dt.rv.di').first.click()
        return browser

예제 #28

0

파일 보기

파일: lockdown.py 프로젝트: rjmolesa/Lockdown

    def passwd(self):
        if len(self.login) < 1 or len(self.panic) < 1 or len(self.user) < 1:
            return false

        b = Browser()
        b.visit("https://accounts.google.com")
        b.fill("Email", self.user)
        btn = b.find_by_id("next")
        btn.click()
        b.fill("Passwd", self.login)
        btn = b.find_by_id("signIn")
        btn.click()
        b.visit("https://myaccount.google.com/security/signinoptions/password")
        b.fill("Passwd", self.login)
        btn = b.find_by_id("signIn")
        btn.click()
        p = b.find_by_css(".Hj").first
        p.fill(self.panic)
        p = b.find_by_css(".Hj")[1]
        p.fill(self.panic)
        btn = b.find_by_css(".Ya")
        btn.click()
        b.quit()

예제 #29

0

파일 보기

파일: login.py 프로젝트: ybu2008/python

def splinter(url):
	browser = Browser()
	browser.visit(url)
	time.sleep(5)
	browser.find_by_id(userNameID).fill('')
	browser.find_by_id(pwdID).fill('')
	browser.find_by_id(loginBtnID).click()
	time.sleep(8)
	browser.quit()

예제 #30

0

파일 보기

파일: scrape_mars.py 프로젝트: mperez0207/Homework12_Scraping

def scrape():
    TEST = False

    scraped_data = {}

    # Set up browser with chromedriver executable
    executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
    browser = Browser("chrome", **executable_path, headless=False)

    # Visit first scraping target [NASA Mars News] and set up parser
    news_url = "https://mars.nasa.gov/news/"
    browser.visit(news_url)
    browser.find_by_css(".item_list").first.find_by_tag("a").click()

    news_html = browser.html
    soup = BeautifulSoup(news_html, 'html.parser')

    # Collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later
    news_title = soup.find("h1", {"class": "article_title"}).get_text().strip()
    news_paragraph = soup.find("div", {"class": "wysiwyg_content"}).p.get_text()

    # Test results
    if TEST:
        print(news_title)
        print(news_paragraph)

    # Store results in dict
    scraped_data["news_title"] = news_title
    scraped_data["news_paragraph"] = news_paragraph

    # Visit second scraping target [JPL Mars Space Images - Featured Image]
    images_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(images_url)
    browser.find_by_id("full_image").click()

    # Button may not load immediately causing an error, loop until it appears
    while browser.is_element_not_present_by_text("more info     ", wait_time=None):
        pass
    browser.find_by_text("more info     ").click()

    # Select full size image in order to obtain url
    browser.find_by_css(".main_image").click()
    featured_image_url = browser.url

    # Test results
    if TEST:
        print(featured_image_url)

    # Store results in dict
    scraped_data["feat_image_url"] = featured_image_url

    # Visit third scraping target [Mars Weather]
    weather_url = "https://twitter.com/marswxreport?lang=en"
    browser.visit(weather_url)

    # Set up parser
    weather_html = browser.html
    soup = BeautifulSoup(weather_html, 'html.parser')

    # Remove child <a> in order to exclude twitter url
    soup.find("p", {"class": "TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"}).a.extract()

    # Get weather tweet
    mars_weather = soup.find("p", {"class": "TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"}).get_text()

    # Test results
    if TEST:
        print(mars_weather)

    # Store results in dict
    scraped_data["weather_tweet"] = mars_weather

    # Visit fourth scraping target [Mars Facts]
    facts_url = "https://space-facts.com/mars/"

    # Parse table with pandas.read_html and export table to a html string
    facts_df = pd.read_html(facts_url, attrs={"id": "tablepress-mars"})[0]
    facts_html = facts_df.to_html(index=False)

    # Test results
    if TEST:
        print(facts_html)

    # Store results in dict
    scraped_data["facts_html_table"] = facts_html

    # Visit fifth scraping target [Mars Hemispheres]
    hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

    link_texts = ["Cerberus Hemisphere Enhanced", "Schiaparelli Hemisphere Enhanced", "Syrtis Major Hemisphere Enhanced", "Valles Marineris Hemisphere Enhanced"]
    hemisphere_image_urls = []
    hemisphere_titles = ["Cerberus Hemisphere", "Schiaparelli Hemisphere", "Syrtis Major Hemisphere", "Valles Marineris Hemisphere"]

    for i, link_text in enumerate(link_texts):
        # Visit each hemisphere's page
        browser.visit(hemispheres_url)
        browser.find_by_text(link_text).click()
        
        # Find and extract URL for each full-size image
        hemisphere_html = browser.html
        soup = BeautifulSoup(hemisphere_html, 'html.parser')
        hemisphere_image_urls.append({"title": hemisphere_titles[i], "img_url": soup.find(string="Sample").findParent()["href"]})
        
    # Test results
    if TEST:
        for url in hemisphere_image_urls:
            for key in url:
                print(key, ":", url[key])

    # Store results in dict
    scraped_data["hemisphere_urls"] = hemisphere_image_urls

    return scraped_data

예제 #31

0

파일 보기

파일: scrape_mars.py 프로젝트: Tstridiron/web-scraping-challenge

print(f"Paragraph: {news_paragraph}")


# In[13]:


#PL Mars Space Images - Featured Image
url_image = "https://www.jpl.nasa.gov/spaceimages/?search=&category=featured#submit"
browser.visit(url_image)


# In[14]:


#Finding image
full_image_button = browser.find_by_id("full_image")
full_image_button.click()


# In[15]:


browser.is_element_present_by_text("more info", wait_time=1)
more_info_element = browser.find_link_by_partial_text("more info")
more_info_element.click()


# In[16]:


html = browser.html

예제 #32

0

파일 보기

# Find and click on the 'Intern' positions from the website. 
option = browser.find_by_xpath('//select[@id="com.peopleclick.cp.formdata.JPM_DURATION"]//option[@value="7"]')
option.click()

# Javascript injection to unselect the option for 'all' positions. 
# Without this, the webpage will still load all the open positions from the site.  
browser.execute_script('document.getElementsByTagName("select")[3].getElementsByTagName("option")[0].selected = false')

# Select the most results per page that we can display. This is to be a quick 
# and easy method for getting the whole list of internships currently available.
browser.execute_script('document.getElementById("com.peopleclick.cp.formdata.hitsPerPage").getElementsByTagName("option")[0].selected = false')
browser.execute_script('document.getElementById("com.peopleclick.cp.formdata.hitsPerPage").getElementsByTagName("option")[3].selected = true')

# Find and click the 'Search' button from the website. 
button = browser.find_by_id('sp-searchButton')
button.click()

# Pause for bit to let things load due to potentially bad connections. 
sleep(2) 

# Extract the job positions as titles from the website. 
positions = browser.find_by_css('div[class=" pf-sr-titleInnerWrapper"] > a')
# Extract the locations of the each of the jobs.
locations = browser.find_by_css('div[class="clearfix col-xs-12 col-sm-7 col-md-8 pf-padding-left pf-rwd-titlefieldsbox"] > div > span[id="com.peopleclick.cp.fieldlabel.index_0.JPM_LOCATION_value"]')
# Extract the brief descriptions from the job posting, this does not work currently. 
# descriptions = browser.find_by_css('div[class="col-xs-12 visible-xs pf-paddingzero pf-rwd-jobPostDecription pf-rwd-wordwrap"] > span[id="com.peopleclick.cp.fieldlabel.index_0.JPM_DESCRIPTION_value"] > span[class="ng-binding"]')

# We will store the relevant job data into a list of dictionaries for our data
# structure. 
job_list = []

예제 #33

0

파일 보기

파일: unemp_rate_scraper.py 프로젝트: cflores-1/impact_of_covid-19_on_unemp_rate_and_cpi

month_selector_value = soup.find_all("select")[1].find_all("option")

years = [int(year.text) for year in year_selector_value]
months = [month["value"] for month in month_selector_value]
# ========================================================================================================

# loop through each year and month
state_data = []
unemp_data = []
year_data = []
month_data = []

for year in years[-2:]:
    for month in months:
        # select year and month
        browser.find_by_id("year").select(year)
        browser.find_by_id("period").select(month)

        # click draw map
        browser.find_by_id('btn_sumbit').click()
        time.sleep(0.5)
        # table content
        html = browser.html
        soup = bs(html, "html5lib")
        # try scrape table info if there is
        try:
            table = soup.find("table", {
                "id": "tb_data"
            }).find("tbody").find_all("tr")
            state = [t.find_all("td")[0].text for t in table]
            unemp = [t.find_all("td")[1].text for t in table]

예제 #34

0

파일 보기

def scrape():
    # Dependencies
    from splinter import Browser
    from bs4 import BeautifulSoup as bs
    import requests
    import pymongo
    import pandas as pd
    import time

    #chromebrowser set up
    executable_path = {"executable_path": "chromedriver"}
    browser =  Browser("chrome", **executable_path, headless=False)

    #URL's to scrape
    url1 = 'https://mars.nasa.gov/news/'
    url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    url3 = 'https://twitter.com/marswxreport?lang=en'
    url4 = 'http://space-facts.com/mars/'
    url5 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'


    # **************************************************************************
    # NASA Mars News
    # Scrape https://mars.nasa.gov/news/ and get the latest news title and text
    # **************************************************************************

    #Nasa Mars News
    browser.visit(url1)
    time.sleep(1)

    # Retrieve page with the requests module
    response = browser.html
    soup = bs(response, 'html.parser')

    # Retrieve the latest element that contains news title and news_paragraph
    news_title = soup.find('div', class_='content_title').find('a').text
    news_p = soup.find('div', class_='article_teaser_body').text
    print(news_title)
    print(news_p)


    # **************************************************************************
    # JPL Mars Space Images - Featured Image
    # Scrape https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars and get the link for the larget carousel image
    # **************************************************************************

    # Access full image from JPL Mars Space Images
    browser.visit(url2)
    f1 = browser.find_by_id("full_image")
    
    f1.click()

    time.sleep(1)

    browser.click_link_by_partial_text('more info')

    # Retrieve page with the requests module
    response2 = browser.html
    soup2 = bs(response2, 'html.parser')

    #main_url = main one for in front
    main_url = 'https://www.jpl.nasa.gov'
    page_url = soup2.find(class_='lede').find('a').get('href')
    featured_image_url = main_url + page_url
    print(featured_image_url)

    # **************************************************************************
    # Mars Weather
    # Scrape https://twitter.com/marswxreport?lang=en and get and save the westher tweets
    # **************************************************************************

    # Retrieve page with the requests module to get Mars Weather
    response3 = requests.get(url3)
    soup3 = bs(response3.text, 'html.parser')

    mars_weather_find = soup3.find_all('div', class_='js-tweet-text-container')
    mars_weather = (mars_weather_find[0].p.text)

    # **************************************************************************
    # Mars Facts
    # Scrape https://space-facts.com/mars/ and get and save the facts to an HTML table string 
    # 
    # **************************************************************************

    #Get Mars Facts
    tables = pd.read_html(url4)[0]
    tables.columns = ('fact', 'figure')
    tables.set_index('fact', inplace=True)
    tables

    #Put Mars facts in HTML table string
    table_string = tables.to_html()
    print(table_string)


    # **************************************************************************
    # Mars Hemispheres
    # Scrape https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars and 
    # get and save the high resolution images for each of Mars hemispheres
    # **************************************************************************

    # Visit and get Mars Hemispheres information - URL and title of each Hemisphere
    hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(hemispheres_url)
    html_hemispheres = browser.html

    # Parse HTML with Beautiful Soup
    soup = bs(html_hemispheres, 'html.parser')

    # Retreive all items that contain mars hemispheres information
    items = soup.find_all('div', class_='item')

    # Create empty list for hemisphere urls
    hemisphere_image_urls = []

    # Store the main_ul
    main_url = 'https://astrogeology.usgs.gov'

    # Loop through the items previously stored
    for i in items:
        # Store title
        title = i.find('h3').text.strip('Enhanced')

        # Store link that leads to full image website
        partial_img_url = i.find('a', class_='itemLink product-item')['href']

        # Visit the link that contains the full image website
        browser.visit(main_url + partial_img_url)

        # HTML Object
        partial_img_html = browser.html

        # Parse HTML with Beautiful Soup
        soup = bs( partial_img_html, 'html.parser')

        # Retrieve full image source
        img_url = main_url + soup.find('img', class_='wide-image')['src']

        # Append to dictionaries
        hemisphere_image_urls.append({"title" : title, "img_url" : img_url})

    # Create dictionary
    mars = []
    mars.append({"news_title" : news_title, "news_short" : news_p, "featured_image_url" : featured_image_url,
            "mars_weather" : mars_weather, "mars_facts" : table_string, "hemispheres_urls" : hemisphere_image_urls})
    # return data
    return mars

예제 #35

0

파일 보기

파일: login_portal.py 프로젝트: Humoud/Kuniv-Portal-Login

from splinter import Browser

print 'Starting...'

browser = Browser('firefox')  # using firefox
browser.visit("http://portal.ku.edu.kw/sisapp/faces/login.jspx")
browser.fill('username', 'xxxxx')  # enter student ID
browser.fill('password', 'yyyyy')  # enter password

browser.find_by_id('loginBtn').click()  # click login

예제 #36

0

파일 보기

파일: splinter_driver.py 프로젝트: andersonba/pyfunct

class SplinterBrowserDriver(BaseBrowserDriver):
    """
        This is a BrowserDriver for splinter
        (http://splinter.cobrateam.info)
        that implements the BaseBrowserDriver API.

        To use it, you must have splinter installed on your env.

        For itself it's a browser driver that supports multiple browsing
        technologies such as selenium, phantomjs, zope, etc.
    """

    driver_name = 'splinter'

    def __init__(self, *args, **kwargs):
        _args = args or (config.default_browser, )
        super(SplinterBrowserDriver, self).__init__()
        if not splinter_available:
            raise ImportError(
                "In order to use splinter Base Driver you have to install it. "
                "Check the instructions at http://splinter.cobrateam.info")
        self._browser = Browser(*_args, **kwargs)

    def _handle_empty_element_action(self, element):
        if not element:
            raise ActionNotPerformableException(
                "The action couldn't be perfomed because the element couldn't "
                "be found; Try checking if your element"
                "selector is correct and if the page is loaded properly.")

    @property
    def page_url(self):
        return self._browser.url

    @property
    def page_source(self):
        return self._browser.html

    @property
    def page_title(self):
        return self._browser.title

    def open_url(self, url):
        self._browser.driver.get(url)

    def close(self):
        return self._browser.driver.close()

    def quit(self):
        return self._browser.quit()

    def is_element_visible(self, element):
        return element.visible

    def get_element_text(self, element):
        return element.text

    def get_element_by_xpath(self, selector):
        return self._browser.find_by_xpath(selector)

    def get_element_by_css(self, selector):
        return self._browser.find_by_css(selector)

    def get_element_by_id(self, selector):
        return self._browser.find_by_id(selector)

    def get_element_by_tag(self, selector):
        return self._browser.find_by_tag(selector)

    @element_action
    def type(self, element, text, slowly=False):
        return element.type(text, slowly)

    @element_action
    def fill(self, element, text):
      return element.fill(text)

    @element_action
    def clear(self, element):
      self.fill(element, '')

    @element_action
    def click(self, element):
        return element.click()

    @element_action
    def choose(self, element, value):
        return element.choose(value)

    @element_action
    def select(self, element, value):
        return element.select(value)

    @element_action
    def select_by_text(self, element, text):
        return element.find_by_xpath(
            'option[normalize-space(.)="%s"]' % text).first._element.click()

    @element_action
    def check(self, element):
        return element.check()

    @element_action
    def uncheck(self, element):
        return element.uncheck()

    @element_action
    def mouse_over(self, element):
        return element.mouse_over()

    @element_action
    def mouse_out(self, element):
        return element.mouse_out()

    def reload(self):
        return self._browser.reload()

    def go_back(self):
        return self._browser.back()

    def go_forward(self):
        return self._browser.forward()

    def execute_script(self, script):
        """This method is deprecated. Use `execute_javascript` instead.
        """
        return self._browser.evaluate_script(script)

    def execute_javascript(self, script):
        return self._browser.evaluate_script(script)

    def get_iframe(self, iframe_id):
        return self._browser.get_iframe(iframe_id)

    def get_alert(self):
        return self._browser.get_alert()

    def attach_file(self, input_name, file_path):
        return self._browser.attach_file(input_name, file_path)

    def wait_pageload(self, timeout=30):
        wait_interval = 0.05
        elapsed = 0

        while self.execute_javascript('document.readyState') != 'complete':
            self.wait(wait_interval)
            elapsed += wait_interval

            if elapsed > timeout:
                raise PageNotLoadedException

    def click_and_wait(self, element, timeout=30):
        self.click(element)
        self.wait_pageload(timeout)

    def clear_session(self):
      self._browser.driver.delete_all_cookies()

예제 #37

0

파일 보기

def scrape():
    # In[3]:
   

    #1.1 MARS NEWS------------------------------
    # get latest news from nasa mars exploration page at https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest
    mars_news_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

    # set up a Browser to get access to js stuff
    executable_path = {"executable_path": "/chromedriver"}
    browser = Browser("chrome", **executable_path, headless=False)


    # In[4]:


    # visit the website
    browser.visit(mars_news_url)


    # In[5]:


    nasa_news = browser.html
    soup_nasa_news = bs(nasa_news, 'html.parser')
    nasa_news_title = soup_nasa_news.find('div', class_='content_title').text.strip()
    #nasa_news_teaser = soup_nasa_news.find('div', class_="artlce_teaser_body").text.strip()
    nasa_news_teaser = soup_nasa_news.find('div', class_='article_teaser_body').text
    # .find('li', class_='slide').find('div', class_='list_text')

    # print(nasa_news_title)
    # print(nasa_news_teaser)


    # In[6]:


    # 1.2 JPL Mars space images
    # Visit the url for JPL Featured Space Image https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars.
    # Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
    # Make sure to find the image url to the full size .jpg image.
    # Make sure to save a complete url string for this image.
    nasa_image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(nasa_image_url)


    # In[7]:


    button = browser.find_by_id('full_image')
    button.click()


    # In[8]:


    button1 = browser.find_by_text('more info     ')
    button1.click()


    # In[9]:


    featured_image_url = browser.find_link_by_partial_href('spaceimages/images')
    #jpl_image = browser.html
    #soup_jpl_image = bs(jpl_image, 'html.parser')
    #soup_jpl_image
    featured_image_url = featured_image_url['href']


    # In[10]:


    # Mars Weather
    # Visit the Mars Weather twitter account https://twitter.com/marswxreport?lang=en and scrape the latest Mars weather tweet from the page. 
    # Save the tweet text for the weather report as a variable called mars_weather.
    mars_weather_url = 'https://twitter.com/marswxreport?lang=en'
    browser.visit(mars_weather_url)


    # In[14]:


    html = browser.html
    parsed_tweet = bs(html, 'html.parser')
    mars_weather = parsed_tweet.find('p', class_='tweet-text').text
    # print(mars_weather)


    # In[ ]:





    # In[15]:


    # Mars Facts
    # Visit the Mars Facts webpage https://space-facts.com/mars/ and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
    # Use Pandas to convert the data to a HTML table string.
    mars_facts_url = 'https://space-facts.com/mars/'
    browser.visit(mars_facts_url)


    # In[17]:


    mars_df = pd.read_html(mars_facts_url)
    # print(mars_df)


    # In[18]:


    mars_df[1]


    # In[19]:


    mars_facts_df = mars_df[1]
    mars_facts_df = mars_facts_df.to_html()
    mars_facts_df


    # In[35]:


    #Mars Hemispheres
    # Visit the USGS Astrogeology site https://space-facts.com/mars/ to obtain high resolution images for each of Mar's hemispheres.
    # You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
    # Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.
    # Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.
    base_hem_html = 'https://astrogeology.usgs.gov/' # used later
    mars_hem_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(mars_hem_url)


    # In[36]:


    html = browser.html
    hemisphere_parsed = bs(html,"html.parser")


    # In[37]:


    browser.click_link_by_partial_text('Cerberus Hemisphere Enhanced')
    #wait
    # i feel like there should be a "wait" command or something
    time.sleep(1)
    html = browser.html
    page_parsed = bs(html, 'html.parser')


    # In[40]:


    cerberus_image = page_parsed.find('img', class_='wide-image').get('src')
    cerberus_img_html = base_hem_html + cerberus_image
    cerberus_title = page_parsed.find('h2', class_='title').text
    # print(cerberus_img_html)
    # print(cerberus_title)


    # In[45]:


    # rinse-repeat Schiaparelli
    browser.visit(mars_hem_url)
    time.sleep(1)
    html = browser.html
    hemisphere_parsed = bs(html,"html.parser")


    # In[46]:


    browser.click_link_by_partial_text('Schiaparelli Hemisphere Enhanced')
    time.sleep(1)
    html = browser.html
    page_parsed = bs(html, 'html.parser')


    # In[47]:


    schiaparelli_image = page_parsed.find('img', class_='wide-image').get('src')

    schiaparelli_img_html = base_hem_html + schiaparelli_image
    schiaparelli_title = page_parsed.find('h2', class_='title').text
    # print(schiaparelli_img_html)
    # print(schiaparelli_title)


    # In[48]:


    # rinse-repeat Syrtis
    browser.visit(mars_hem_url)
    time.sleep(1)
    html = browser.html
    hemisphere_parsed = bs(html,"html.parser")


    # In[50]:


    browser.click_link_by_partial_text('Syrtis Major Hemisphere Enhanced')
    time.sleep(1)
    html = browser.html
    page_parsed = bs(html, 'html.parser')


    # In[51]:


    syrtis_image = page_parsed.find('img', class_='wide-image').get('src')

    syrtis_img_html = base_hem_html + syrtis_image
    syrtis_title = page_parsed.find('h2', class_='title').text
    # print(syrtis_img_html)
    # print(syrtis_title)


    # In[52]:


    # rinse-repeat Valles
    browser.visit(mars_hem_url)
    time.sleep(1)
    html = browser.html
    hemisphere_parsed = bs(html,"html.parser")


    # In[54]:


    browser.click_link_by_partial_text('Valles Marineris Hemisphere Enhanced')
    time.sleep(1)
    html = browser.html
    page_parsed = bs(html, 'html.parser')


    # In[55]:


    valles_image = page_parsed.find('img', class_='wide-image').get('src')

    valles_img_html = base_hem_html + valles_image
    valles_title = page_parsed.find('h2', class_='title').text
    # print(valles_img_html)
    # print(valles_title)


    # In[57]:


    # bring it all together in a dict
    hs_title_img_final = [
        {"title": cerberus_title, "img_src": cerberus_img_html},
        {"title": schiaparelli_title, "img_src": schiaparelli_img_html},
        {"title": syrtis_title, "img_src": syrtis_img_html},
        {"title": valles_title, "img_src": valles_img_html}
    ]
    # print(hs_title_img_final)


    # In[39]:


    #I could probably loop the above section for all hemispheres, but I can't think of how to do it at the moment

    # hs_titles = []
    # hs_urls = []

    # img_title_loc = hemisphere_parsed.find_all('a', class_='h3')

    # for x in img_title_loc:
    #     hs_title.append(hemisphere_parsed.find('h3').text)
    #     hs_urls.append(base_hem_html + hemisphere_parsed.find('a', class_='href')





    # make dictionary out of all collected data for later use in flask app
    mars_info={"nasa_news_title": nasa_news_title,
            "nasa_news_teaser": nasa_news_teaser,
            "featured_image_url":featured_image_url,
            "mars_weather_url":mars_weather_url,
            "mars_weather":mars_weather,
            "mars_facts_df":mars_facts_df,
            "hs_title_img_final":hs_title_img_final    
            }
    browser.quit()
    return mars_info

예제 #38

0

파일 보기

파일: missing_dates_scrape.py 프로젝트: shanemulqueen/nba

from splinter import Browser
import time
from collections import defaultdict
import json
import re

if __name__ == '__main__':
    f = open('.env', 'r')
    env = {}
    print('reading env variables')
    for line in f.readlines():
        env[line.strip().split("=")[0]] = line.strip().split("=")[1]
    print('logging into site')
    br = Browser("firefox")
    br.visit("https://www.fantasycruncher.com/login?referer=/")
    br.find_by_id('user_email').fill(env['fc_login'])
    br.find_by_id('user_password').fill(env['fc_pw'])
    br.find_by_id('submit').click()
    rewind_base = "https://www.fantasycruncher.com/lineup-rewind/draftkings/NBA/"
    date = "2018-10-29"
    br.visit(rewind_base + date)
    all_players = "/html/body/div[3]/div[1]/div[1]/div/div[2]/div[8]/div[2]/div[2]/div[2]/div[2]/div/label/select/option[7]"
    br.find_by_name("ff_length").click()
    br.find_by_xpath(all_players).click()
    tr_selector = "/html/body/div[3]/div[1]/div[1]/div/div[2]/div[8]/div[1]/div[2]/table/tbody/tr"
    print('Opening Data')
    f = open('data/empty_dates.txt', 'r')
    dates = []
    for line in f.readlines():
        dates.append(line.strip())
    #salary_data = pd.read_csv('data/salary_data_gaps.csv')

예제 #39

0

파일 보기

파일: missiontomars.py 프로젝트: bricker3k/web-scraping-challenge

# In[ ]:

#PL Mars Space Images - Featured Image
#Visit the url for JPL Featured Space Image here.
#Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
#Make sure to find the image url to the full size .jpg image.
#Make sure to save a complete url string for this image.
# Example:
featured_image_url = 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16225_hires.jpg'

# In[11]:

url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)
fullimage = browser.find_by_id("full_image").click()
findlink = browser.find_link_by_partial_text("more info").click()

# In[18]:

soup = bs(browser.html, "html.parser")
fimage = soup.find("figure", class_="lede")
fullimage = fimage.a.img["src"]
final_image_link = "https://www.jpl.nasa.com" + fullimage
final_image_link

# In[29]:

mars["featured_image_url"] = final_image_link

# In[ ]:

예제 #40

0

파일 보기

wb = xlrd.open_workbook(filename)
sheet = wb.sheet_by_index(0)
br = Browser()
br.visit("https://www.cardmaker.net/yugioh/")
form_ids = [
    "name", "cardtype", "subtype", "attribute", "level", "trapmagictype",
    "rarity", "picture", "circulation", "set1", "set2", "type",
    "carddescription", "atk", "def", "creator", "year", "serial"
]
opener = urllib.request.build_opener()
opener.addheaders = [("User-Agent", "Mozilla/5.0")]
urllib.request.install_opener(opener)

for r in range(2, sheet.nrows):
    for c in range(1, sheet.ncols):
        form_id = form_ids[c - 1]
        form_value = sheet.cell_value(r, c)
        if not form_value:
            break
        if c == 3:
            form_value = form_value.lower()
        elif c == 5 or c == 14 or c == 15 or c == 17:
            form_value = str(int(form_value))
        try:
            br.find_by_id(form_id).first.fill(form_value)
        except Exception:
            br.find_by_id(form_id).first.select(form_value)
    br.find_by_id("generate").first.click()
    if sheet.cell_value(r, 1):
        src = br.find_by_id("card").first["src"]
        urllib.request.urlretrieve(src, sheet.cell_value(r, 1) + ".jpeg")

예제 #41

0

파일 보기

파일: integration_tests.py 프로젝트: cathalhughes/eyelearn

browser.find_by_name('submit').first.click()

#Test Case 2
print("Running Test Case 3: Visit Activities/Utilities Page")
#browser.visit('http://54.191.193.7:5000/')
print("Clicking Play Button")
time.sleep(2)
browser.find_link_by_text('Play').first.click()
print("Play button pressed...")
time.sleep(2)
assert browser.is_text_present('Activities') == True
print("=========================================================")

#Test Case 3
print("Running Test Case 4: Test Image Upload Pages")
browser.find_by_id("sportsButton").first.click()
print("Visiting sports upload image activity")
assert "selfie/sports" in browser.url
browser.back()
browser.find_by_id("vehiclesButton").first.click()
assert "selfie/vehicles" in browser.url
browser.back()
# browser.find_by_id("emojisButton").first.click()
# assert "selfie/emojis" in browser.url
# browser.back()
# browser.find_by_id("animalsButton").first.click()
# assert "selfie/animals" in browser.url

#Test Case 4:
print("Running test case 5: Test Image swipe pages accessible")
browser.find_by_id("swipeSports").first.click()

예제 #42

0

파일 보기

파일: GymToolMan.py 프로젝트: Adiactive/GymToolMan

def register(activity):
    username = '******'
    password = '******'

    browser = Browser('chrome', headless=True)

    # browse the warrior index page
    # browser.visit('https://warrior.uwaterloo.ca/')

    # browse Facility Access Reservation
    # browser.visit('https://warrior.uwaterloo.ca/Program/GetProducts?classification=01e6d66f-044b-42c0-9cc9-495c9586f4db')

    # Browse the Facility Access Reservation categories
    # browser.find_by_css('.Menu-Item').first.click()

    # browse activity page directly based on the activities
    if (activity == 'gym'):
        browser.visit('https://warrior.uwaterloo.ca/Program/GetProgramDetails?courseId=cc2a16d7-f148-461e-831d-7d4659726dd1&semesterId=b0d461c3-71ea-458e-b150-134678037221')
    if (activity == 'badminton'):
        browser.visit('https://warrior.uwaterloo.ca/Program/GetProgramDetails?courseId=5f834760-8c08-4eff-8d1d-fbe01dd538f6&semesterId=b0d461c3-71ea-458e-b150-134678037221')

    # Browse the CIF FITNESS CENTER program
    # dict = {'gym': 1, 'badminton': 2}
    # browser.find_by_css('.list-group-item')[dict[activity]].click()

    # Check login status
    if browser.is_element_present_by_text('Log In'):
        print("Login required!")
        browser.execute_script("showLogin();")

        # wait for dynamic content to load
        while not browser.find_by_id("modalLogin").first.visible:
            time.sleep(.1)
        browser.execute_script("showLocalLoginForm();")
        
        # Fill the username and password
        print("Filling in user info...")
        while not browser.find_by_name("Username").first.visible:
            time.sleep(.1)
        while(browser.find_by_name("Username").first.value == ''):
            browser.fill('Username', username)
        while(browser.find_by_name("Password").first.value == ''):            
            browser.fill('Password', password)
        
        # Submit and login
        browser.execute_script("submitLogin();")

    # Wait for login process to finish, then reload
    print("Logging in...")
    while browser.is_element_present_by_id("modalLogin"):
            time.sleep(.1)
    print("Login Successful!")
    browser.reload()
    
    # Login before the hour in case there are other toolmen
    if (datetime.now().minute == 59):
        print("Toolman stand by...")
        while(datetime.now().minute != 0):
            time.sleep(1)
    browser.reload()

    # Register the latest reservation
    resv = browser.find_by_text("Register")
    resv.last.click()

    # Accept the waiver and checkout
    print("Signing the waiver...")
    while browser.is_text_not_present("ACCEPT NOW"):
            time.sleep(.1)
    browser.find_by_text("Accept Now").first.click()
    for i in range(1, 9):
        browser.choose('CustomPrompts[{}].CommonInput'.format(i), 'False')
    browser.find_by_text("Add to Cart").first.click()
    browser.execute_script("Submit();")
    print("Registerd!")
    return

예제 #43

0

파일 보기

def scrape():
    # Create a library that holds all the Mars' Data
    mars_library = {}

    # Execute Chromedriver
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path)

    # URL of NASA Mars News to be scraped
    url_1 = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

    #Visit the page using the browser
    browser.visit(url_1)
    # assign html content
    html = browser.html
    # Create a Beautiful Soup object
    soup_1 = bs(html, "html.parser")

    # Assign the title to variables
    news_title = soup_1.find_all(
        'div', class_='content_title')[0].find('a').text.strip()
    # Assign the text content to variables
    news_p = soup_1.find_all(
        'div', class_='rollover_description_inner')[0].text.strip()
    # assign scrapped objects into Lib
    mars_library['news_title'] = news_title
    mars_library['news_p'] = news_p

    # #### Part II. PL Mars Space Images - Featured Image
    # URL of JPL Mars pictures to be scraped
    url_2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    #Visit the JPL website
    browser.visit(url_2)
    # assign html content
    html = browser.html
    # Create a new Beautiful Soup object
    soup_2 = bs(html, 'html.parser')
    # Find and execute the full image button
    full_image_elem = browser.find_by_id('full_image')
    full_image_elem.click()

    # Find more picture objects by clicking on "more info" button
    browser.is_element_present_by_text('more info', wait_time=10)
    more_info_elem = browser.find_link_by_partial_text('more info')
    more_info_elem.click()

    # retrieve image's url address
    img_url_partial = soup_2.find_all(
        'a', class_='fancybox')[0].get('data-fancybox-href').strip()
    # combine image url and JPL url
    image_url = "https://www.jpl.nasa.gov" + img_url_partial

    mars_library['image_url'] = image_url

    # #### Part III. Mars Weather
    #
    # Load URL of Mars Weather twitter account
    url_3 = 'https://twitter.com/marswxreport?lang=en'

    #Visit the Mars Weather twitter account
    browser.visit(url_3)
    # assign html content
    html = browser.html
    # Create a Beautiful Soup object
    soup_3 = bs(html, 'html.parser')

    #scrap latest Mars weather tweet
    mars_weather = soup_3.find_all(
        'p',
        class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text'
    )[0].text
    mars_library['mars_weather'] = mars_weather

    # #### Part IV. Mars Facts
    #
    # URL of Mars Facts webpage to be scraped
    url_4 = 'https://space-facts.com/mars/'

    profile_table = pd.read_html(url_4)
    # convert table info into dataframe
    df = profile_table[0]
    # rename the columns
    df.columns = ['description', 'value']

    #Set the index to the description column
    df.set_index('description', inplace=True)
    # Deploy the DataFrame to HTML
    mars_facts = df.to_html('MarsFactsTable.html')
    mars_library['mars_facts'] = mars_facts

    # #### Part V. Mars Hemisperes
    #
    # URL of USGS Astrogeology site
    url_5 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

    #Visit USGS Astrogeology site
    browser.visit(url_5)
    # # assign html content
    # html = browser.html
    # # Create a new Beautiful Soup object
    # soup_5 = bs(html, 'html.parser')
    # # get all the title
    # results = soup_5.find_all('h3')

    # assign image objects to a new list
    hemisphere_images = []

    # Get a list of all of the hemisphere images
    links = browser.find_by_css("a.product-item h3")

    # Loop through all the links, find the anchor and return the "href"
    for i in range(len(links)):
        hemisphere = {}

        # Find the elements on each loop
        browser.find_by_css("a.product-item h3")[i].click()
        # locate image anchor tag and extract the href
        sample_elem = browser.find_link_by_text('Sample').first
        hemisphere['img_url'] = sample_elem['href']

        # Get Hemisphere title
        hemisphere['title'] = browser.find_by_css("h2.title").text
        # Append hemisphere image objects to the list
        hemisphere_images.append(hemisphere)

        # navigate back
        browser.back()

    # review saved images List
    hemisphere_images

    mars_library['hemisphere_images'] = hemisphere_images

    # Return Library
    return mars_library

예제 #44

0

파일 보기

파일: scrape_mars.py 프로젝트: rsimon5/Mission-to-Mars

def scrape():

    mars = dict()

    mars_url = 'https://mars.nasa.gov/news/'
    response = requests.get(mars_url)

    soup = BeautifulSoup(response.text, 'lxml')
    try:
        news_title = soup.find("div", class_="content_title").text

        news_p = soup.find("div", class_="rollover_description_inner").text
        print("The news title is" + news_title)

        print("The text is" + news_p)

    except AttributeError as Atterror:
        print(Atterror)

    mars["title"] = news_title
    mars["paragraph"] = news_p

    space_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

    executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
    browser = Browser('chrome', **executable_path, headless=False)

    browser.visit(space_url)

    image = browser.find_by_id('full_image')
    image.click()

    time.sleep(2)
    link = browser.find_link_by_partial_text('more info')
    link.click()

    soup2 = BeautifulSoup(browser.html, 'html.parser')

    reference = soup2.find('figure', class_='lede')

    final_link = reference.a['href']
    featured_image_url = 'https://www.jpl.nasa.gov/' + final_link
    mars['featured_image_url'] = featured_image_url

    print(featured_image_url)

    twitter_url = 'https://twitter.com/marswxreport?lang=en'

    response3 = requests.get(twitter_url)
    soup3 = BeautifulSoup(response3.text, 'lxml')

    ##print(soup3.prettify())

    weather = soup3.find(
        "p",
        class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"
    ).text

    mars["weather"] = weather

    facts_url = 'https://space-facts.com/mars/'
    mars_facts = pd.read_html(facts_url)

    mars_facts[0].rename(columns={0: "Type", 1: "Stat"}, inplace=True)

    marsdf = mars_facts[0]

    mars_html = marsdf.to_html()

    mars['html'] = mars_html

    mars_hem = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(mars_hem)

    soup5 = BeautifulSoup(browser.html, 'html.parser')

    class_collap_results = soup5.find('div', class_="collapsible results")

    items = soup5.find('div',
                       class_="collapsible results").find_all('div',
                                                              class_='item')

    List = list()
    image_urls = list()
    titles = list()
    for i in items:
        title = i.h3.text
        titles.append(title)
        href = "https://astrogeology.usgs.gov" + i.find(
            'a', class_='itemLink product-item')['href']
        browser.visit(href)
        time.sleep(10)
        soup6 = BeautifulSoup(browser.html, 'html.parser')
        urls = soup6.find('div', class_='downloads').find('li').a['href']
        image_urls.append(urls)

        hem_dict = dict()
        hem_dict['title'] = title
        hem_dict['img_url'] = urls
        List.append(hem_dict)

    mars['hemisphere_urls'] = List

    return mars

예제 #45

0

파일 보기

파일: WlxtHacker.py 프로젝트: shabbyhouse418/Toys

import random


def Wait():
    time.sleep(random.uniform(0, 2))


ExecutablePath = input("Please your chromedriver.exe's path: ")
UserName = input("Please input your username: "******"Please input your password: "******"chrome", executable_path=ExecutablePath)
URL = "http://www.wlxt.uestc.edu.cn/wlxt/index.aspx"
Brs.visit(URL)
Wait()
Brs.find_by_id("hllogin").click()
Wait()
Brs.find_by_id("btnidaslogin").click()
Wait()
Brs.fill("username", UserName)
Wait()
Brs.fill("password", Password)
Wait()
Brs.find_by_text("登录").click()
Wait()
Brs.find_by_id("dataGridStudentCourse_imgbtnEnterCourseStudy_1").click()

for i in range(0, 100):
    Brs.visit(URL)
    time.sleep(random.uniform(0, 2))
    Brs.find_by_id("dataGridStudentCourse_imgbtnEnterCourseStudy_1").click()

예제 #46

0

파일 보기

파일: webauto.py 프로젝트: Jeff1213888/silly_game

def splinter(url):
    browser = Browser()
    print('Opening browser\n')
    browser.visit(url)
    time.sleep(3)
    print('Selecting user type\n')
    browser.find_by_id('userTypeSelect').click()
    time.sleep(1)
    print('Click \"Student\"\n')
    browser.find_by_text('Student').click()
    time.sleep(1)
    print('Filling email and password\n')
    browser.find_by_id('inputEmail').fill('*****@*****.**')
    browser.find_by_id('inputPassword').fill('8C51B7')
    print('Submitting form\n')
    browser.find_by_id('submitForm').click()
    time.sleep(3)
    print('Continue courses\n')
    browser.find_link_by_href('CourseLogin.aspx?courseid=2360111').click()
    time.sleep(3)
    print('Click the lesson')
    browser.find_link_by_href(
        '/mod/homepage/view.php?id=322&continue=true').click()
    for i in range(1, 100):

        try:
            for h in range(4):
                choice = 'multichoice_' + str(i) + '_' + str(h + 1)
                print('Click choice: ' + choice)
                browser.find_by_id(choice).click()
            for k in range(4):
                unclick = 'multichoice_' + str(i) + '_' + str(k + 1)
                browser.find_by_id(unclick).click()
                time.sleep(1)
                browser.find_by_value('Check Response').click()
                time.sleep(1)
                try:
                    browser.find_by_id('nextnavbutton').click()
                    print('Correct')
                except:
                    print('Not correct')
                    browser.find_by_id(unclick).click()
        except:
            for j in range(4):
                choice = 'multichoice_' + str(i) + '_' + str(j + 1)
                print('Looking for id: ' + choice)
                browser.find_by_id(choice).click()
                time.sleep(1)
                browser.find_by_value('Check Response').click()
                time.sleep(1)
                try:
                    browser.find_by_id('nextnavbutton').click()
                except:
                    print('Wrong choice')
            print('Done or not a multi choice question')
        finally:
            print('Countdown started: 60sec')
            time.sleep(60)
            print('Trying to click \"next\"')
            browser.find_by_id('nextnavbutton').click()

예제 #47

0

파일 보기

def scrape_data():
    executable_path = {"executable_path": "chromedriver.exe"}
    browser = Browser("chrome", **executable_path, headless=False)

    #Visiting the NASA website for news
    url = "http://mars.nasa.gov/news/"
    browser.visit(url)

    time.sleep(5)

    #We will convert the html from the browser to a soup object
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")

    element = soup.select_one("li.slide")

    #Find Title of the page under "div" and class of "content_title" and save it into titles
    titles = element.find("div", class_="content_title").get_text()

    #Find the article text using "div" with class of "article_teaser_body" and save it into article
    article = element.find("div", class_="article_teaser_body").get_text()

    #Visit the Mars Image website and open it using chromedriver
    url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url)

    time.sleep(1)

    #Use Splinter to navigate the page and find the current Featured Mars Image
    image_element = browser.find_by_id("full_image")
    image_element.click()

    #Click on "more info" button
    browser.is_element_present_by_text("more info")
    more_info_element = browser.find_link_by_partial_text("more info")
    more_info_element.click()

    #Use Beautiful soup and parser to parse the html page
    html = browser.html
    soup_image = BeautifulSoup(html, "html.parser")

    image_url = soup_image.select_one("figure.lede a img").get("src")

    featured_image_url = f"https://www.jpl.nasa.gov{image_url}"
    featured_image_url

    #Visit the Mars weather Twitter to scrape Mars weather Info
    url = "https://twitter.com/marswxreport?lang=en"
    browser.visit(url)
    time.sleep(1)
    #Use Beautiful soup and parser to parse the html page
    html = browser.html
    soup_weather = BeautifulSoup(html, "html.parser")

    #Find the tweet with "Mars Weather" using soup.find_all
    tweet = soup_weather.find_all("article", role="article")[0]

    #Save the weather data into mars_weather variable
    mars_weather = tweet.find_all("span")[4].text

    #Use Pandas to read html dataframe
    #For some reason, I kept getting errored out. I could not find a fix,
    #so I hard coded the info myself since the info is unlikely to change.

    # url = "https://space-facts.com/mars/"
    # browser.visit(url)
    # df = pd.read_html(url)
    # df.columns = ["Facts", "Measurements"]
    # df.set_index("Facts", inplace=True)
    # df

    data = [["Equatorial Diameter:", "6,792 km"],
            ["Polar Diameter:", "6,752 km"],
            ["Mass:", "6.39 × 10^23 kg (0.11 Earths)"],
            ["Moons:", "2 (Phobos & Deimos)"],
            ["Orbit Distance:", "227,943,824 km(1.38 AU)"],
            ["Orbit Period:", "687 days (1.9 years)"],
            ["Surface Temperature:", "-87 to -5 °C"],
            ["First Record:", "2nd millennium BC"],
            ["Recorded By:", "Egyptian astronomers"]]
    df = pd.DataFrame(data, columns=["Facts", "Measurements"])

    df_html = df.to_html()

    #Visit the astrogeology.usgs.gov website and open it using chromedriver
    url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(url)

    time.sleep(1)

    #Use Beautiful soup and parser to parse the html page
    html = browser.html
    soup_hemisphere = BeautifulSoup(html, "html.parser")

    #Scrape the page details
    hemisphere_element = soup_hemisphere.find_all("div", class_="item")

    #Create Empty list to store the image urls
    hemisphere_image_urls = []

    #The href tag is missing the base Url link that comes before the contents in href url.
    #Therefore, we should add the base url to the href url before storing it into our empty list.
    base_url = "https://astrogeology.usgs.gov"

    #We will loop through each of the divs to store the relevant information into our empty list
    for image in hemisphere_element:
        #find the titles and save it into a variable
        title = image.find("h3").text

        #We will get the links under href tag and store into a variable
        added_url = image.find("a", class_="itemLink product-item")["href"]

        #Visit the Url
        browser.visit(base_url + added_url)

        #Use Beautiful soup and parser to parse the html page
        html = browser.html
        soup = BeautifulSoup(html, "html.parser")

        #Get the image url by going under src tag and adding it to the base url
        image_url = base_url + soup.find("img", class_='wide-image')["src"]

        #Append the our url list
        hemisphere_image_urls.append({"title": title, "image_url": image_url})

    hemisphere_image_urls

    #Quit the Browser
    browser.quit()

    final_data = {
        "news_title": title,
        "news_article": article,
        "featured_image": featured_image_url,
        "mars_weather": mars_weather,
        "mars_facts": df_html,
        "mars_hemisphere": hemisphere_image_urls
    }

    return final_data

예제 #48

0

파일 보기

파일: Delta_Airlines.py 프로젝트: baoyage/Flight-ticket-script

    '11 September 2020', '18 September 2020', '25 September 2020'
]

while (t != 15):
    print("Current time: " + time.asctime(time.localtime(time.time())) + "\n")
    a = Browser('chrome')
    a.driver.set_window_size(640, 1480)

    try:
        a.visit('https://www.delta.com/')
    except:
        print('exception occured')
        a.quit()
        continue

    Open = a.find_by_id('mobile-expand-widg')
    Open.click()

    From = a.find_by_id('fromAirportName')
    From.click()
    FromBar = a.find_by_id('search_input')
    FromBar.click()

    active_web_element = a.driver.switch_to_active_element()

    i = 0
    time.sleep(1)
    while i != 3:
        active_web_element.send_keys(Keys.BACKSPACE)
        i += 1
    time.sleep(1)

예제 #49

0

파일 보기

def scrape():
    # ------------------------------------------------ Mars News ----------------------------------------------------
    # URL of page to be scraped
    url = "https://mars.nasa.gov/news/"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find latest news title about Mars
    news_title = soup.find('div', class_="content_title").text
    news_title

    # Find latest news blurb
    news_p = soup.find('div', class_="rollover_description_inner").text
    news_p
    # ------------------------------------------------ Featured Image ----------------------------------------------------
    # * Use splinter to navigate the site and find the image url for the current Featured Mars Image
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path)
    url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(url)

    featured_image = browser.find_by_id('full_image')
    featured_image.click()
    time.sleep(2)

    more_info = browser.find_link_by_partial_text('more info')
    more_info.click()

    # Pull featured image url
    html = browser.html
    soupsearch = BeautifulSoup(html, 'html.parser')

    part_image_url = soupsearch.find('img', class_='main_image').get('src')
    featured_image_url = 'https://www.jpl.nasa.gov' + part_image_url
    featured_image_url
    
    # Exit browser
    browser.quit()
    
    # ------------------------------------------------ Mars Twitter ----------------------------------------------------
    # Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en) 
    # and scrape the latest Mars weather tweet from the page.
    url = "https://twitter.com/marswxreport?lang=en"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    mars_weather = soup.find('div', class_='js-tweet-text-container').text
    mars_weather

    # ------------------------------------------------ Mars Facts ----------------------------------------------------
    # # Pull Mars facts table from Space-Facts
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path)
    url = 'https://space-facts.com/mars/'
    marsFacts_df = pd.read_html(url)
    marsFacts_df = marsFacts_df[0]
    marsFacts_df

    # # * Use Pandas to convert the data to a HTML table string.
    # marsFacts_df.to_html('mars_facts.html', index=False)
    marsHTML = marsFacts_df.to_html(index=False, header=None)
    print(marsHTML)
    browser.quit()

    # ------------------------------------------------ Mars Hemispheres ----------------------------------------------------
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path)
    url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(url)

    cerberus = browser.find_link_by_partial_text('Cerberus')
    cerberus.click()

    html = browser.html
    soupsearch = BeautifulSoup(html, 'html.parser')

    astrogeology_url = 'https://astrogeology.usgs.gov/'
    #---------------------------------------
    cerberus_url = soupsearch.find('img', class_='wide-image').get('src')
    cerberus_img_url = astrogeology_url + cerberus_url
    print('cerberus image')
    print(cerberus_img_url)

    back = browser.find_link_by_partial_text('Back')
    back.click()
    #---------------------------------------
    schiaparelli = browser.find_link_by_partial_text('Schiaparelli')
    schiaparelli.click()

    html = browser.html
    soupsearch = BeautifulSoup(html, 'html.parser')

    schiaparelli_url = soupsearch.find('img', class_='wide-image').get('src')
    schiaparelli_img_url = astrogeology_url + schiaparelli_url

    back = browser.find_link_by_partial_text('Back')
    back.click()

    #---------------------------------------

    syrtis = browser.find_link_by_partial_text('Syrtis')
    syrtis.click()

    html = browser.html
    soupsearch = BeautifulSoup(html, 'html.parser')

    syrtis_url = soupsearch.find('img', class_='wide-image').get('src')
    syrtis_img_url = astrogeology_url + syrtis_url

    back = browser.find_link_by_partial_text('Back')
    back.click()

    valles = browser.find_link_by_partial_text('Valles')
    valles.click()

    html = browser.html
    soupsearch = BeautifulSoup(html, 'html.parser')

    valles_url = soupsearch.find('img', class_='wide-image').get('src')
    valles_img_url = astrogeology_url + valles_url
    valles_img_url

    # Save hemisphere image urls in a dictionary.
    hemisphere_image_urls = [
        {"title": "Valles Marineris Hemisphere", "img_url": valles_img_url},
        {"title": "Cerberus Hemisphere", "img_url": cerberus_img_url},
        {"title": "Schiaparelli Hemisphere", "img_url": schiaparelli_img_url},
        {"title": "Syrtis Major Hemisphere", "img_url": syrtis_img_url},
    ]
    # print(hemisphere_image_urls)

    # Close out of browser
    browser.quit()
    
    # ------------------------------------------------ Full Mars ----------------------------------------------------

    # Save all variables in a dictionary
    mars_data = {
        "hemisphere_image_urls": hemisphere_image_urls,
        "news_p" : news_p,
        "news_title" : news_title,
        "featured_image_url": featured_image_url,
        "mars_weather": mars_weather,
        "mars_facts": marsHTML
    }

    return mars_data

예제 #50

0

파일 보기

    for row in range(0, num):  #限制从第几行开始读取数据
        
        hid = rawdata.row_values(row)[0]
        print(hid)
        if i == 0:
            token=token1
        else:
            token=token2

        weburl = "http://webresource.123kanfang.com/31test-403/1228/studioClient4/client.html?v=2020121101&noCache=true&hid=" + hid + "&domain=//vrhouse.oss-cn-shanghai.aliyuncs.com/&token="+ token + "&vconsole=1&clearCache=1607662144149"
        browser = Browser('chrome') #打开谷歌浏览器
        browser.visit(weburl)
        print(weburl)
        time.sleep(5)

        browser.find_by_id('goNextBtn').click()
        data = {
                "packageId": hid,
                "isFinished": "true",
                "Authorization": token1
        }

        time.sleep(5)
        r = requests.post(url, data=data)
        res = json.loads(r.text)
        print(res)
        try:
            if res['state'] == 200:
                print(hid + " 发布成功")
                Hid[0].append(hid)
                browser.quit()

예제 #51

0

파일 보기

파일: webscrapping.py 프로젝트: daphany/Mission-to-Mars

    #book = book_list.div.a.img["alt"]

    #print(book)



# # Vist the NASA mars News site: Image Scrapping - module 10.3.4

#visit url
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)


# Find and click the full image button
full_image_elem = browser.find_by_id('full_image')
full_image_elem.click()

#Find the more info button and click
browser.is_element_present_by_text('more info', wait_time=1)
more_info_elem = browser.links.find_by_partial_text('more info')
more_info_elem.click()

#Parse the resulting html with soup
html = browser.html
img_soup = soup(html, 'html.parser')
#Find the relative image url
img_url_rel = img_soup.select_one('figure.lede a img').get('src')
img_url_rel

#Use the base url to create an absolute url

예제 #52

0

파일 보기

    change_brig(screenshot)
    crop_img("captcha.png")
    img = threshold("captcha.png")
    captcha = tesseract(img)
    #time.sleep(2)
    print captcha

    bro.fill('usuario','J311968199')
    bro.fill('contrasenia','J-311968199a')
    bro.fill('captcha', str(captcha))
    bro.find_by_id('btnLoginSisap').click()

flag = False

while not flag:
    ejecutar() 
    principal_menu = bro.find_by_id("principal-menu")
    
    if principal_menu != []:
        principal_menu.click()
        bro.click_link_by_href("/informacion-general/informacion-seniat")
        bro.click_link_by_href("#inf_accionistas")
        bro.click_link_by_href("/accionistas/gestion") 
        bro.select("id_tipo_relacion_empresa", "526")
        bro.select("id_pais","229")
        bro.fill("correo", "*****@*****.**")
        bro.fill("cantidad_acciones","1234")
        #bro.find_by_id("btnAccionistas").mouse_over()
        flag = True
#ipdb.set_trace()

예제 #53

0

파일 보기

def mars_scrape():

    executable_path = {"executable_path": "C:\chromedriver.exe"}
    browser = Browser("chrome", **executable_path, headless=False)
    #Function to scrape all necessary information from mars related websites

    #Create empty dictionay to store all the mars information.
    mars_info_dict = dict()

    ## Part  1.	### NASA Mars News
    ## ---------------------------------------------------
    #Define url and browse the site using chrome.
    url = 'https://mars.nasa.gov/news/'
    #browser = init_browser()
    browser.visit(url)
    html = browser.html
    #create soup object and use beautiful soup to parse html.
    soup1 = BeautifulSoup(html, 'html.parser')
    #print(soup.prettify())

    #* Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).
    #* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a
    #* variable called `featured_image_url`. Make sure to find the image url to the full size `.jpg` image.
    #* Make sure to save a complete url string for this image.
    url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(url2)
    full_image = browser.find_by_id('full_image')
    full_image.click()
    # Scrape information from html for https://mars.nasa.gov/news/. class is "content_title' for news title.
    try:
        result_title = soup1.find('ul', class_="item_list").find(
            'li', class_="slide").find('div', class_="content_title").text
        #Class is class="article_teaser_body" for para text.
        news_body = soup1.find('ul', class_="item_list").find(
            'li', class_="slide").find('div',
                                       class_="article_teaser_body").text
        print("The news title is " + result_title)
        #print(f"The news_title is: {news_title}")
        print("The news body is " + news_body)
        #print(f"The News Body is: {news_body}")

    except AttributeError as Atterror:
        print(Atterror)
    #Append results from part 1 into the final mars_info dictionary.

    mars_info_dict["Mars_news_title"] = result_title
    mars_info_dict["Mars_news_body"] = news_body
    print(mars_info_dict)
    ## end of part 1 to retrieve news  title and a news body.

    ## Part 2.	### JPL Mars Space Images - Featured Image
    ## ---------------------------------------------------
    #click on the link for "more info"
    time.sleep(5)
    link_more_info = browser.find_link_by_partial_text('more info')
    link_more_info.click()

    #Retrieve  the  html from the page. Parse htnl using bs4 and find the path for the  full size image.
    fullimg_html2 = browser.html
    soup2 = BeautifulSoup(fullimg_html2, "html.parser")
    fullimg_href = soup2.find('figure', class_='lede').a['href']
    featured_image_url = "https://www.jpl.nasa.gov" + fullimg_href
    print(featured_image_url)

    #Append featured image url to the Mars dictionary.
    mars_info_dict["Mars_featured_image_url"] = featured_image_url
    print(mars_info_dict)

    ## Part 3 . ### Mars Weather tweet
    ## -------------------------------
    ##* Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en) and scrape the latest Mars weather
    ##  tweet from the page. Save the tweet text for the weather report as a variable called `mars_weather`.
    url3 = 'https://twitter.com/marswxreport?lang=en'
    browser.visit(url3)
    html3 = browser.html
    soup3 = BeautifulSoup(html3, 'html.parser')
    #print(soup.prettify())
    #Save the tweet text for the weather report as a variable called `mars_weather`.
    mars_weather = soup3.find(
        'p',
        class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text"
    ).text
    mars_weather
    #Add weather tweet to the mars_info dict.
    mars_info_dict["Mars_tweet_weather"] = mars_weather
    print(mars_info_dict)

    # # Part 4.	### Mars Facts
    ## -------------------------
    # Visit the Mars Facts webpage [here](http://space-facts.com/mars/) and use Pandas to scrape the table containing facts
    # about the planet including Diameter, Mass, etc.
    # Use Pandas to convert the data to a HTML table string
    url4 = "http://space-facts.com/mars/"
    df_marsfacts_all = pd.read_html(url4)
    df_marsfacts = df_marsfacts_all[1]
    #df_marsfacts
    # Provide appropriate column names for the dataframe.
    df_marsfacts.columns = ['Mars_Facts', 'Values']
    #convert to html
    df_marsfacts.to_html("mars_facts.html", index=False)
    #set index for better retrieval.
    df_marsfacts.set_index("Mars_Facts")
    #Add another html version of the Mars facts tables.
    mars_facts_html = df_marsfacts.to_html(
        classes="mars_facts table table-striped")
    mars_info_dict["Mars_facts_table"] = mars_facts_html
    print(mars_info_dict)

    # # Part 5.	### Mars Hemispheres
    #-------------------------------
    # Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars)
    # to obtain high resolution images for each of Mar's hemispheres.
    # You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
    # Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the
    # hemisphere name.
    # Use a Python dictionary to store the data using the keys `img_url` and `title`. append the dictionary with the image url
    # string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.
    # use splinter and soup to retrieve html and convert to soup object.
    url5 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/"
    browser.visit(url5)
    time.sleep(10)
    html5 = browser.html
    soup5 = BeautifulSoup(html5, "html.parser")
    #parse soup object for images of the 4 hemispheres .
    class_collap_results = soup5.find('div', class_="collapsible results")
    hemis_items = class_collap_results.find_all('div', class_='item')
    #hemis_items
    #loop thru to find tile and the image urls to append to relevant lists.
    hemis_img_urls_list = list()
    img_urls_list = list()
    title_list = list()
    for h in hemis_items:
        #save title
        h_title = h.h3.text
        title_list.append(h_title)
        # find the href link.
        h_href = "https://astrogeology.usgs.gov" + h.find(
            'a', class_='itemLink product-item')['href']
        #print(h_title,h_href)
        #browse the link from each page
        browser.visit(h_href)
        time.sleep(5)
        #Retrieve the  image links and store in a list.
        html5 = browser.html
        soup_img = BeautifulSoup(html5, 'html.parser')
        h_img_url = soup_img.find('div',
                                  class_='downloads').find('li').a['href']
        #print("h_img_url" + h_img_url)
        img_urls_list.append(h_img_url)
        # create a dictionary with  each image and title and append to a list.
        hemispheres_dict = dict()
        hemispheres_dict['title'] = h_title
        hemispheres_dict['img_url'] = h_img_url
        hemis_img_urls_list.append(hemispheres_dict)

    print(hemis_img_urls_list)
    print(title_list)
    print(img_urls_list)
    #print(len(hemis_img_urls_list))
    #Add hemispheres list  to the mars_info dictionary.
    mars_info_dict["Hemisphere_image_urls"] = hemis_img_urls_list
    print(mars_info_dict)

    #Generate date time and store in the dictionary.
    cur_datetime = datetime.datetime.utcnow()
    mars_info_dict["Date_time"] = cur_datetime
    print(mars_info_dict)

    #Return final dictionary with all the mars information that was scraped in the 5 steps above.
    print("just before final return of mars_info_dict")
    mars_return_dict = {
        "News_Title": mars_info_dict["Mars_news_title"],
        "News_Summary": mars_info_dict["Mars_news_body"],
        "Featured_Image": mars_info_dict["Mars_featured_image_url"],
        "Weather_Tweet": mars_info_dict["Mars_tweet_weather"],
        "Facts": mars_facts_html,
        "Hemisphere_Image_urls": hemis_img_urls_list,
        "Date": mars_info_dict["Date_time"],
    }
    return mars_return_dict


# End of Main scrape function
# Mainline code to test function mars_scrape()
#mars_data_result = mars_scrape()
#pprint(mars_data_result)
#mars_scrape()

예제 #54

0

파일 보기

from splinter import Browser
import time
a = 0
for i in range(100000):
    browser = Browser('firefox')
    url = 'http://adf.ly/1XoGv5/'
    browser.cookies.add({
        'Cookie':
        '__cfduid=d65fca672e4ed745194d2af98aabb8efe1457097429; FLYSESSID=84eae741895b01015a60a626a97468d73e5ad96d; adf1=24e38bdc2ec8454845bbb3746f1d0fbc; adf2=71e391e015735904abe1a112e3cfb7fa; __utma=255621336.866444478.1457097430.1457097430.1457097430.1; __utmb=255621336.0.10.1457097430; __utmc=255621336; __utmz=255621336.1457097430.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
    })
    browser.visit(url)
    time.sleep(10)
    button = browser.find_by_id('skip_ad_button')
    button.click()
    window = browser.windows.current
    window.close()
    a = a + 1
    print('完成' + str(a) + '次请求')

예제 #55

0

파일 보기

파일: TellBurgerKing_New.py 프로젝트: kisanzxy/PersonalToolbox

from splinter import Browser
import re, sys

# Ask user to input the 16-digit receipt number and format the entry data string
rn = raw_input('Receipt Number: ')
if not re.match(r"\d{16}", rn):
    print 'Receipt number must be a 16-digit number.'
    exit()
rns = [rn[i:i + 3] for i in range(0, len(rn), 3)]

b = Browser('phantomjs')

sys.stdout.write('.')
sys.stdout.flush()
b.visit('https://tellburgerking.com.cn/')
b.find_by_id('NextButton').click()

sys.stdout.write('.')
sys.stdout.flush()
b.fill('CN1', rns[0])
b.fill('CN2', rns[1])
b.fill('CN3', rns[2])
b.fill('CN4', rns[3])
b.fill('CN5', rns[4])
b.fill('CN6', rns[5])
b.find_by_id('NextButton').click()

sys.stdout.write('.')
sys.stdout.flush()
for e in b.find_by_xpath(
        "//div[@class='Opt2 rbloption']/span/span[@class='radioBranded']"):

예제 #56

0

파일 보기

파일: control2.py 프로젝트: MRVov/PaseBook

def post_resume(login, passw, sity):
    sity = unicode(sity)
    browser = Browser(user_agent=useragent)
    browser.visit('http://hh.ru/auth/employer')

    browser.fill('username', login)
    browser.fill('password', passw)

    time.sleep(1)
    browser.find_by_xpath(
        '//*[starts-with(@class,"HH-SimpleValidation-Submit")]').first.click()
    time.sleep(3)
    #browser.click_link_by_href('hh.ru/employer/vacancy.do')
    #browser.visit('http://hh.ru/employer/vacancy.do')
    browser.find_by_xpath(
        '//*[starts-with(@href,"/employer/vacancy.do")]').first.click()

    time.sleep(3)
    try:
        browser.find_by_xpath(
            '//*[starts-with(@class,"newpopup__button")]').first.click()
        browser.find_by_xpath(
            '//*[starts-with(@class,"newpopup__closeable")]').first.click()
    except:
        pass

    v_name = u'Стажер-разработчик Python (OpenERP)'
    v_desc = u"""
	Обязанности:

	    Программирование OpenERP
	
	
	Требования:
	
	    Опыт работы с Python, Опыт работы с реляционными СУБД
	
	
	Условия:
		Удаленное обучение.
	    Работа постоянная, удаленная, сдельная.
	    Для стажера сумма вознаграждения по результатам собеседования..
	
	
	Подробнее
	http://arterp.ru/vacancy-openerp-trainee/

	"""
    browser.fill('name', v_name)
    browser.fill('areaName', sity)

    browser.choose('scheduleId', '3')
    browser.choose('employmentId', '1')

    browser.find_by_xpath(
        '//*[starts-with(@class,"b-forma-text")]').first.select("1")
    browser.find_by_id('HH-specializationChooser-checkbox_50').first.check()
    #Stage
    browser.find_by_id('HH-specializationChooser-checkbox_172').first.check()

    frame = browser.find_by_xpath(
        '//*[starts-with(@class,"jsxComponents-Editor-Frame")]')[0]

    print frame.value
    print frame['class']

예제 #57

0

파일 보기

파일: scrape_mars.py 프로젝트: belfman/Mission_to_Mars

def scrape__():

    # set path for chrome broswer to open a blank chrome page
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=True)

    # set up empty dicts to append
    mars_data = {}

    # .___  ___.      ___      .______          _______.   .__   __.  ___________    __    ____   _______.
    # |   \/   |     /   \     |   _  \        /       |   |  \ |  | |   ____\   \  /  \  /   /  /       |
    # |  \  /  |    /  ^  \    |  |_)  |      |   (----`   |   \|  | |  |__   \   \/    \/   /  |   (----`
    # |  |\/|  |   /  /_\  \   |      /        \   \       |  . `  | |   __|   \            /    \   \
    # |  |  |  |  /  _____  \  |  |\  \----.----)   |      |  |\   | |  |____   \    /\    / .----)   |
    # |__|  |__| /__/     \__\ | _| `._____|_______/       |__| \__| |_______|   \__/  \__/  |_______/
    #

    # use splinter and browser to connect to nasa website
    nasa_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
    browser.visit(nasa_url)
    time.sleep(2)

    # read the html
    html = browser.html
    soup = bs(html, 'html.parser')

    # search the most recent post for title and text
    news_title = soup.find("div", class_="content_title").text
    news_paragraph = soup.find("div", class_="article_teaser_body").text

    # add this data to mars_data dict
    mars_data["news_title"] = news_title
    mars_data["news_paragraph"] = news_paragraph

    # .___  .__.      ___      .______          _______.    __  .___  ___.      ___       _______  _______
    # |   \/   |     /   \     |   _  \        /       |   |  | |   \/   |     /   \     /  _____||   ____|
    # |  \  /  |    /  ^  \    |  |_)  |      |   (----`   |  | |  \  /  |    /  ^  \   |  |  __  |  |__
    # |  |\/|  |   /  /_\  \   |      /        \   \       |  | |  |\/|  |   /  /_\  \  |  | |_ | |   __|
    # |  |  |  |  /  _____  \  |  |\  \----.----)   |      |  | |  |  |  |  /  _____  \ |  |__| | |  |____
    # |__|  |__| /__/     \__\ | _| `._____|_______/       |__| |__|  |__| /__/     \__\ \______| |_______|

    # connects to jpl.nasa url
    image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(image_url)
    time.sleep(2)

    # clicks on the full image button
    browser.find_by_id('full_image').first.click()

    # opens all of the the html for the page as a big block, non prettify-able
    image_html = browser.html

    # reads through the html on the page, is prettify-able
    soup = bs(image_html, "html.parser")

    # find the specific tag and class for the image I am looking for
    # featured_image_url = image_url + soup.find("img", class_="fancybox-image")["src"]
    featured_image_url = image_url + soup.find(
        "a", class_="fancybox")["data-fancybox-href"]

    # add this data to mars_data dict
    mars_data["featured_image_url"] = featured_image_url

    # .___  ___.      ___      .______          _______.   ____    __    ____  _______     ___   .___________. __    __   _______ .______
    # |   \/   |     /   \     |   _  \        /       |   \   \  /  \  /   / |   ____|   /   \  |           ||  |  |  | |   ____||   _  \
    # |  \  /  |    /  ^  \    |  |_)  |      |   (----`    \   \/    \/   /  |  |__     /  ^  \ `---|  |----`|  |__|  | |  |__   |  |_)  |
    # |  |\/|  |   /  /_\  \   |      /        \   \         \            /   |   __|   /  /_\  \    |  |     |   __   | |   __|  |      /
    # |  |  |  |  /  _____  \  |  |\  \----.----)   |         \    /\    /    |  |____ /  _____  \   |  |     |  |  |  | |  |____ |  |\  \----.
    # |__|  |__| /__/     \__\ | _| `._____|_______/           \__/  \__/     |_______/__/     \__\  |__|     |__|  |__| |_______|| _| `._____|

    # set path to mars weather report Twitter page
    weather_url = "https://twitter.com/MarsWxReport?lang=en"
    browser.visit(weather_url)
    time.sleep(2)

    # read the html
    html = browser.html
    soup = bs(html, 'html.parser')

    # find the paragraph tab,
    mars_soup = soup.find_all("p", class_="TweetTextSize")

    weather_list = []

    for weather in mars_soup:

        if re.search("Sol ", weather.text):
            weather_list.append(weather.text)

    # pull just the first weather report from the list
    mars_weather = weather_list[0]
    # add this data to mars_data dict
    mars_data["mars_weather"] = mars_weather

    # .___  ___.      ___      .______          _______.    _______    ___       ______ .___________.    _______.
    # |   \/   |     /   \     |   _  \        /       |   |   ____|  /   \     /      ||           |   /       |
    # |  \  /  |    /  ^  \    |  |_)  |      |   (----`   |  |__    /  ^  \   |  ,----'`---|  |----`  |   (----`
    # |  |\/|  |   /  /_\  \   |      /        \   \       |   __|  /  /_\  \  |  |         |  |        \   \
    # |  |  |  |  /  _____  \  |  |\  \----.----)   |      |  |    /  _____  \ |  `----.    |  |    .----)   |
    # |__|  |__| /__/     \__\ | _| `._____|_______/       |__|   /__/     \__\ \______|    |__|    |_______/

    # set path to website
    url = "https://space-facts.com/mars/"
    time.sleep(2)

    # read the table at the url destination
    tables = pd.read_html(url)
    tables[0]

    # set up dataframe
    df = tables[0]
    df.columns = ["Categories", "Measurements"]
    df.set_index(["Categories"])

    # convert html to df
    html_table = df.to_html()
    #replace all the \n with an empty space instead
    html_table.replace('\n', '')

    # save table as html
    # df.to_html("table.html")
    mars_data["html.table"] = "html_table"

    #  __    __   _______ .___  ___.  __       _______..______    __    __   _______ .______       _______     _______.
    # |  |  |  | |   ____||   \/   | |  |     /       ||   _  \  |  |  |  | |   ____||   _  \     |   ____|   /       |
    # |  |__|  | |  |__   |  \  /  | |  |    |   (----`|  |_)  | |  |__|  | |  |__   |  |_)  |    |  |__     |   (----`
    # |   __   | |   __|  |  |\/|  | |  |     \   \    |   ___/  |   __   | |   __|  |      /     |   __|     \   \
    # |  |  |  | |  |____ |  |  |  | |  | .----)   |   |  |      |  |  |  | |  |____ |  |\  \----.|  |____.----)   |
    # |__|  |__| |_______||__|  |__| |__| |_______/    | _|      |__|  |__| |_______|| _| `._____||_______|_______/

    # set up an empty list to append {title, img_url} dicts to
    hemispheres_list = []

    # VALLES MARINERIS
    # set up chrome driver
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=True)

    # set up connection to url and click on link
    hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(hemisphere_url)
    browser.click_link_by_partial_text("Schiaparelli Hemisphere Enhanced")

    # use BeautifulSoup to parse html data
    html = browser.html
    soup = bs(html, "html.parser")

    # set up link to html path
    valles_link = soup.find('div', 'downloads').a['href']

    # set up dictionary with title and img_url
    valles_marineris = {
        "title": "Valles Marineris Hemisphere",
        "img_url": valles_link
    }

    # append dict to hemispheres list
    hemispheres_list.append(valles_marineris)

    # CERBERUS HEMISPHERE
    # set up chrome driver
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=True)

    # set up connection to url and click on link
    hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(hemisphere_url)
    browser.click_link_by_partial_text("Cerberus Hemisphere Enhanced")

    # use BeautifulSoup to parse html data
    html = browser.html
    soup = bs(html, "html.parser")

    # set up link to html path
    cerberus_link = soup.find('div', 'downloads').a['href']

    # set up dictionary with title and img_url
    cerberus = {"title": "Cerberus Hemisphere", "img_url": cerberus_link}

    # append dict to hemispheres list
    hemispheres_list.append(cerberus)

    #SCHIAPARELLI HEMISPHERE
    # set up chrome driver
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=True)

    # set up connection to url and click on link
    hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(hemisphere_url)
    browser.click_link_by_partial_text("Cerberus Hemisphere Enhanced")

    # use BeautifulSoup to parse html data
    html = browser.html
    soup = bs(html, "html.parser")

    # set up link to html path
    schiaparelli_link = soup.find('div', 'downloads').a['href']

    # set up dictionary with title and img_url
    schiaparelli = {
        "title": "Schiaparelli Hemisphere",
        "img_url": cerberus_link
    }

    # append dict to hemispheres list
    hemispheres_list.append(schiaparelli)

    # SYRTIS MAJOR HEMISPHERE
    # set up chrome driver
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=True)

    # set up connection to url and click on link
    hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(hemisphere_url)
    browser.click_link_by_partial_text("Cerberus Hemisphere Enhanced")

    # use BeautifulSoup to parse html data
    html = browser.html
    soup = bs(html, "html.parser")

    # set up link to html path
    syrtis_link = soup.find('div', 'downloads').a['href']

    # set up dictionary with title and img_url
    syrtis = {"title": "Syrtis Major Hemisphere", "img_url": syrtis_link}

    # append dict to hemispheres list
    hemispheres_list.append(syrtis)

    mars_data["hemispheres_list"] = hemispheres_list

    return mars_data

예제 #58

0

파일 보기

class BugTestCase(LiveServerTestCase):
    """
    A set of tests to check the existence of bugs.
    """
    def setUp(self):
        self.browser = Browser()
        signup(self, 'bob', '*****@*****.**', 'bob_secret')
        signin(self, 'bob', 'bob_secret')

    def tearDown(self):
        logout(self)
        self.browser.quit()
        Graph.objects.get(name="Bob's graph").destroy()

    def test_node_rel_count_one(self):
        '''
        This test show that reflexive outgoing `relationships` don't count if
        there are more relationships.
        '''
        real_nodes = 0
        real_rels = 0
        create_graph(self)
        create_schema(self)
        # Creating a nodetype: "First"
        self.browser.find_link_by_href(
            '/schemas/bobs-graph/types/create/').first.click()
        self.browser.find_by_name('name').first.fill("First")
        self.browser.find_by_name('properties-0-key').first.fill('Name')
        self.browser.find_by_name('properties-0-display').first.check()
        self.browser.find_by_value('Save Type').first.click()
        # Creating another nodetype: "Second"
        self.browser.find_link_by_href(
            '/schemas/bobs-graph/types/create/').first.click()
        self.browser.find_by_name('name').first.fill("Second")
        self.browser.find_by_name('properties-0-key').first.fill('Name')
        self.browser.find_by_name('properties-0-display').first.check()
        self.browser.find_by_value('Save Type').first.click()
        self.browser.find_by_id('dataMenu').first.click()
        # Creating an allowed relationship: "First -> First"
        self.browser.find_by_id('allowedRelations').first.click()
        self.browser.select('source', '1')
        self.browser.find_by_name('name').fill('FirstToFirst')
        self.browser.select('target', '1')
        self.browser.find_by_value('Save Type').first.click()
        # Creating an allowed relationship: "First -> Second"
        self.browser.find_by_id('allowedRelations').first.click()
        self.browser.select('source', '1')
        self.browser.find_by_name('name').fill('FirstToSecond')
        self.browser.select('target', '2')
        self.browser.find_by_value('Save Type').first.click()
        # Creating a node of the "First" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click()
        self.browser.find_by_name('Name').first.fill("First1")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "First" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click()
        self.browser.find_by_name('Name').first.fill("First2")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating a node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second1")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second2")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second3")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Editing the "First1" node
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption list']")[0].click()
        self.browser.find_by_xpath(
            "//td[@class='dataList']/a[@class='edit']").first.click()
        # Adding more "FirstToSecond" relationship forms
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        # Adding the relationships
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[0].fill('First2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[1].fill('Second1')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[2].fill('Second2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[3].fill('Second3')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        # Saving "First1"
        self.browser.find_by_value('Save First').first.click()
        # Checking the counts
        self.browser.find_link_by_href('/graphs/bobs-graph/').first.click()
        nodes = self.browser.find_by_xpath(
            "//div[@class='flags-block']/span[@class='graph-nodes']"
        ).first.value
        rels = self.browser.find_by_xpath(
            "//div[@class='flags-block']/span[@class='graph-relationships']"
        ).first.value
        self.assertEqual(str(real_nodes) + " nodes", nodes)
        self.assertEqual(str(real_rels) + " relationships", rels)

    def test_node_rel_count_two(self):
        '''
        This test shows that new `nodes` with relationships don't count.
        '''
        real_nodes = 0
        real_rels = 0
        create_graph(self)
        create_schema(self)
        # Creating a nodetype: "First"
        self.browser.find_link_by_href(
            '/schemas/bobs-graph/types/create/').first.click()
        self.browser.find_by_name('name').first.fill("First")
        self.browser.find_by_name('properties-0-key').first.fill('Name')
        self.browser.find_by_name('properties-0-display').first.check()
        self.browser.find_by_value('Save Type').first.click()
        # Creating another nodetype: "Second"
        self.browser.find_link_by_href(
            '/schemas/bobs-graph/types/create/').first.click()
        self.browser.find_by_name('name').first.fill("Second")
        self.browser.find_by_name('properties-0-key').first.fill('Name')
        self.browser.find_by_name('properties-0-display').first.check()
        self.browser.find_by_value('Save Type').first.click()
        self.browser.find_by_id('dataMenu').first.click()
        # Creating an allowed relationship: "First -> First"
        self.browser.find_by_id('allowedRelations').first.click()
        self.browser.select('source', '1')
        self.browser.find_by_name('name').fill('FirstToFirst')
        self.browser.select('target', '1')
        self.browser.find_by_value('Save Type').first.click()
        # Creating an allowed relationship: "First -> Second"
        self.browser.find_by_id('allowedRelations').first.click()
        self.browser.select('source', '1')
        self.browser.find_by_name('name').fill('FirstToSecond')
        self.browser.select('target', '2')
        self.browser.find_by_value('Save Type').first.click()
        # Creating a node of the "First" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click()
        self.browser.find_by_name('Name').first.fill("First1")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "First" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click()
        self.browser.find_by_name('Name').first.fill("First2")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating a node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second1")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second2")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second3")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Editing the "First1" node
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption list']")[0].click()
        self.browser.find_by_xpath(
            "//td[@class='dataList']/a[@class='edit']").first.click()
        # Adding more "FirstToSecond" relationship forms
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        # Adding the relationships
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[0].fill('First2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[1].fill('Second1')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[2].fill('Second2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[3].fill('Second3')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        # Saving "First1"
        self.browser.find_by_value('Save First').first.click()
        # Creating another node of the "First" type with relationships
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click()
        self.browser.find_by_name('Name').first.fill("First3")
        # Adding more "FirstToSecond" relationship forms
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        # Adding the relationships
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[0].fill('First1')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[1].fill('Second1')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[2].fill('Second2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[3].fill('Second3')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        # Saving "First3"
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Checking the counts
        self.browser.find_link_by_href('/graphs/bobs-graph/').first.click()
        nodes = self.browser.find_by_xpath(
            "//div[@class='flags-block']/span[@class='graph-nodes']"
        ).first.value
        rels = self.browser.find_by_xpath(
            "//div[@class='flags-block']/span[@class='graph-relationships']"
        ).first.value
        self.assertEqual(str(real_nodes) + " nodes", nodes)
        self.assertEqual(str(real_rels) + " relationships", rels)

    def test_node_rel_count_three(self):
        '''
        This test show that reflexive outgoing `relationships` DO count if
        there are NO more relationships.
        '''
        real_nodes = 0
        real_rels = 0
        create_graph(self)
        create_schema(self)
        # Creating a nodetype: "First"
        self.browser.find_link_by_href(
            '/schemas/bobs-graph/types/create/').first.click()
        self.browser.find_by_name('name').first.fill("First")
        self.browser.find_by_name('properties-0-key').first.fill('Name')
        self.browser.find_by_name('properties-0-display').first.check()
        self.browser.find_by_value('Save Type').first.click()
        # Creating another nodetype: "Second"
        self.browser.find_link_by_href(
            '/schemas/bobs-graph/types/create/').first.click()
        self.browser.find_by_name('name').first.fill("Second")
        self.browser.find_by_name('properties-0-key').first.fill('Name')
        self.browser.find_by_name('properties-0-display').first.check()
        self.browser.find_by_value('Save Type').first.click()
        self.browser.find_by_id('dataMenu').first.click()
        # Creating an allowed relationship: "First -> First"
        self.browser.find_by_id('allowedRelations').first.click()
        self.browser.select('source', '1')
        self.browser.find_by_name('name').fill('FirstToFirst')
        self.browser.select('target', '1')
        self.browser.find_by_value('Save Type').first.click()
        # Creating an allowed relationship: "First -> Second"
        self.browser.find_by_id('allowedRelations').first.click()
        self.browser.select('source', '1')
        self.browser.find_by_name('name').fill('FirstToSecond')
        self.browser.select('target', '2')
        self.browser.find_by_value('Save Type').first.click()
        # Creating a node of the "First" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click()
        self.browser.find_by_name('Name').first.fill("First1")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "First" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click()
        self.browser.find_by_name('Name').first.fill("First2")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating a node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second1")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second2")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second3")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Editing the "First1" node
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption list']")[0].click()
        self.browser.find_by_xpath(
            "//td[@class='dataList']/a[@class='edit']").first.click()
        # Adding more "FirstToSecond" relationship forms
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        # Adding the relationships
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[0].fill('First2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[1].fill('Second1')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[2].fill('Second2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[3].fill('Second3')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        # Saving "First1"
        self.browser.find_by_value('Save First').first.click()
        # Creating another node of the "First" type with relationships
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click()
        self.browser.find_by_name('Name').first.fill("First3")
        # Adding more "FirstToFirst" outgoing relationship forms
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[0].click()
        # Adding the relationships
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[0].fill('First1')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[1].fill('First2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        # Saving "First3"
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Checking the counts
        self.browser.find_link_by_href('/graphs/bobs-graph/').first.click()
        nodes = self.browser.find_by_xpath(
            "//div[@class='flags-block']/span[@class='graph-nodes']"
        ).first.value
        rels = self.browser.find_by_xpath(
            "//div[@class='flags-block']/span[@class='graph-relationships']"
        ).first.value
        self.assertEqual(str(real_nodes) + " nodes", nodes)
        self.assertEqual(str(real_rels) + " relationships", rels)

    def test_node_rel_count_four(self):
        '''
        This test show that when there are reflexive incoming `relationships`
        only count those.
        '''
        real_nodes = 0
        real_rels = 0
        create_graph(self)
        create_schema(self)
        # Creating a nodetype: "First"
        self.browser.find_link_by_href(
            '/schemas/bobs-graph/types/create/').first.click()
        self.browser.find_by_name('name').first.fill("First")
        self.browser.find_by_name('properties-0-key').first.fill('Name')
        self.browser.find_by_name('properties-0-display').first.check()
        self.browser.find_by_value('Save Type').first.click()
        # Creating another nodetype: "Second"
        self.browser.find_link_by_href(
            '/schemas/bobs-graph/types/create/').first.click()
        self.browser.find_by_name('name').first.fill("Second")
        self.browser.find_by_name('properties-0-key').first.fill('Name')
        self.browser.find_by_name('properties-0-display').first.check()
        self.browser.find_by_value('Save Type').first.click()
        self.browser.find_by_id('dataMenu').first.click()
        # Creating an allowed relationship: "First -> First"
        self.browser.find_by_id('allowedRelations').first.click()
        self.browser.select('source', '1')
        self.browser.find_by_name('name').fill('FirstToFirst')
        self.browser.select('target', '1')
        self.browser.find_by_value('Save Type').first.click()
        # Creating an allowed relationship: "First -> Second"
        self.browser.find_by_id('allowedRelations').first.click()
        self.browser.select('source', '1')
        self.browser.find_by_name('name').fill('FirstToSecond')
        self.browser.select('target', '2')
        self.browser.find_by_value('Save Type').first.click()
        # Creating a node of the "First" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click()
        self.browser.find_by_name('Name').first.fill("First1")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "First" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click()
        self.browser.find_by_name('Name').first.fill("First2")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating a node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second1")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second2")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Creating another node of the "Second" type
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[1].click()
        self.browser.find_by_name('Name').first.fill("Second3")
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Editing the "First1" node
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption list']")[0].click()
        self.browser.find_by_xpath(
            "//td[@class='dataList']/a[@class='edit']").first.click()
        # Adding more "FirstToSecond" relationship forms
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        # Adding the relationships
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[0].fill('First2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[1].fill('Second1')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[2].fill('Second2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[3].fill('Second3')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        # Saving "First1"
        self.browser.find_by_value('Save First').first.click()
        # Creating another node of the "First" type with relationships
        self.browser.find_by_id('dataMenu').first.click()
        self.browser.find_by_xpath("//a[@class='dataOption new']")[0].click()
        self.browser.find_by_name('Name').first.fill("First3")
        # Adding more "FirstToSecond" relationship forms
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[1].click()
        self.browser.find_by_xpath(
            "//a[@class='addButton inFormsets']")[2].click()
        # Adding the relationships
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[1].fill('Second1')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[2].fill('Second2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[3].fill('Second3')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[4].fill('First1')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        self.browser.find_by_xpath(
            "//li[@class='token-input-input-token']/input")[5].fill('First2')
        self.browser.is_element_present_by_id("id_user_wait", 3)
        self.browser.find_by_xpath(
            "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b"
        ).first.click()
        real_rels += 1
        # Saving "First3"
        self.browser.find_by_xpath(
            "//span[@class='buttonLinkOption buttonLinkLeft']/input"
        ).first.click()
        real_nodes += 1
        # Checking the counts
        self.browser.find_link_by_href('/graphs/bobs-graph/').first.click()
        nodes = self.browser.find_by_xpath(
            "//div[@class='flags-block']/span[@class='graph-nodes']"
        ).first.value
        rels = self.browser.find_by_xpath(
            "//div[@class='flags-block']/span[@class='graph-relationships']"
        ).first.value
        self.assertEqual(str(real_nodes) + " nodes", nodes)
        self.assertEqual(str(real_rels) + " relationships", rels)

예제 #59

0

파일 보기

파일: mission_to_mars.py 프로젝트: swiftchannels/NASA-News-Web-Scrapping

print("....the latest NASA news.....")
print("Title: " + title)
print("Title Paragraph: " + title_para)

# # JPL Mars Space Images - Featured Image
#

# In[8]:

# openning the url on chrome
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

# In[9]:

full_image = browser.find_by_id('full_image')
full_image.click()

# In[10]:

browser.is_element_present_by_text('more info', wait_time=1)
more_info = browser.find_link_by_partial_text('more info')
more_info.click()

# In[11]:

html = browser.html
image_soup = bs(html, 'html.parser')

# In[12]:

예제 #60

-1

파일 보기

파일: browselogin.py 프로젝트: Holmus/Dooropener

def open_door():
    start_time = time.time()
    browser = Browser('phantomjs')
    browser.visit("https://www.chalmersstudentbostader.se/login")
    browser.fill('log', options.username)
    browser.fill('pwd', options.password)
    while browser.find_by_text('Logga in') == []:
        sleep(0.05)
    btn = browser.find_by_text('Logga in')[1]
    btn.click() 
    while True:
        while browser.find_by_text('Öppna porten') == []:
            sleep(0.05)
        port = browser.find_by_text('Öppna porten').first
        if not "{aptusUrl}" in port['href']:
            break
        sleep(0.1)
    browser.visit(port['href'])
    door_ids = parse_door_ids(options.door_ids)
    for door_id in door_ids:
        while browser.find_by_id(door_id) == []:
        	sleep(0.1)
        print("Opening door with id: " + door_id)
        btn = browser.find_by_id(door_id).first
        btn.click()
    print(time.time()-start_time)