Ejemplos de find_by_tag en Python, ejemplos de splinter.browser.find_by_tag en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: scraping.py Proyecto: jlemus1367/Mission-to-Mars

def featured_image(browser):
    # Visit URL
    url = 'https://spaceimages-mars.com'
    browser.visit(url)

    # Find and click the full image button
    full_image_elem = browser.find_by_tag('button')[1]
    full_image_elem.click()

    # Parse the resulting html with soup
    html = browser.html
    img_soup = soup(html, 'html.parser')

    # Add try/except for error handling
    try:
        # Find the relative image url
        img_url_rel = img_soup.find('img', class_='fancybox-image').get('src')

    except AttributeError:
        return None

    # Use the base URL to create an absolute URL
    img_url = f'https://spaceimages-mars.com/{img_url_rel}'

    return img_url

Ejemplo n.º 2

0

Mostrar archivo

def autoSel(classID):
	global browser
	list = browser.find_by_id(classID)[0]
	option_num = len(list.find_by_tag('option')) - 1
	
	for index in range(option_num):	
		list.find_by_tag('option')[index + 1].click()
		nextID = classID + '_ktzy' + str(index + 2)
		if index != option_num - 1:
			list = browser.find_by_id(nextID)[0]		
	submit = browser.find_by_tag('img')[0]
	submit.click()

Ejemplo n.º 3

0

Mostrar archivo

Archivo: scraping.py Proyecto: jlemus1367/Mission-to-Mars

def hemispheres(browser):
    # 1. Use browser to visit the URL
    url = 'https://marshemispheres.com/'
    browser.visit(url)

    # 2. Create a list to hold the images and titles.
    hemisphere_image_urls = []

    # 3. Write code to retrieve the image urls and titles for each hemisphere.
    # Initialize for loop to loop through each hemisphere link
    for link in range(4):

        # Empty dictionary to hold image URLs and titles
        hemispheres = {}

        # Find and click on each hemisphere link
        hemisphere_link = browser.find_by_tag('a.itemLink h3')[link]
        hemisphere_link.click()

        # Navigate to full resolution image and retrieve the full resolution image URL
        image = browser.links.find_by_text('Sample')
        img_url = image['href']

        # Retrieve title for hemisphere image
        title = browser.find_by_tag('h2').text

        # Add hemisphere image URL and title to dictionary
        hemispheres["img_url"] = img_url
        hemispheres["title"] = title

        # Add dictionary to list
        hemisphere_image_urls.append(hemispheres)

        # Navigate back to the beginning to get the next hemisphere image
        browser.back()

    return hemisphere_image_urls

Ejemplo n.º 4

0

Mostrar archivo

def autoRate():
	global browser
	list = browser.find_by_tag('img')[:]
	for i in range(len(list)):
		print i
		#若已评价则跳过
		try:
			item = browser.find_by_tag('img')[i].click()
			checks = browser.find_by_value('10_1')
			if len(checks) == 0:
				browser.back()
				print 'is rated, back!'
				continue
			for check in checks:
				check.click()
			#评价
			words = u'老师人很好！'
			browser.find_by_tag('textarea').first.fill(words)
			submit = browser.find_by_tag('img')[0]
			submit.click()
			time.sleep(0.2)
			browser.get_alert().accept
		except:
			pass

Ejemplo n.º 5

0

Mostrar archivo

Archivo: urpauto.py Proyecto: Tachyu/PythonCodes

		bg.save(r'errorReport/' +v_yzm + r'_s1.jpg')
		return ''
			
try_time = 0
	

while try_time < 10:
	browser.visit(loginpage)
	browser.fill('zjh',student_id)
	browser.fill('mm',passWord)
	v_yzm = ''
	yzm_trytime = 0
	
	while len(v_yzm) != 4 and yzm_trytime < 3:
		if yzm_trytime != 0:
			browser.find_by_tag('a').first.click()
			time.sleep(0.1)
		ran_code = random.random()
		browser.screenshot('E:\\Python Sourse File\\temp\\urp_' + str(ran_code))
		v_yzm = getCode(ran_code)
		yzm_trytime += 1
		error_time += 1
		
	browser.fill('v_yzm',v_yzm)	
	browser.find_by_id('btnSure').first.click()
	time.sleep(0.2)
	test = browser.find_by_text(u'验证码')	
	if len(test) == 0:
		break
	#保存识别错误的验证码
	else:

Ejemplo n.º 6

0

Mostrar archivo

        bg.save(r'errorReport/' + v_yzm + r'_s1.jpg')
        return ''


try_time = 0

while try_time < 10:
    browser.visit(loginpage)
    browser.fill('zjh', student_id)
    browser.fill('mm', passWord)
    v_yzm = ''
    yzm_trytime = 0

    while len(v_yzm) != 4 and yzm_trytime < 3:
        if yzm_trytime != 0:
            browser.find_by_tag('a').first.click()
            time.sleep(0.1)
        ran_code = random.random()
        browser.screenshot('E:\\Python Sourse File\\temp\\urp_' +
                           str(ran_code))
        v_yzm = getCode(ran_code)
        yzm_trytime += 1
        error_time += 1

    browser.fill('v_yzm', v_yzm)
    browser.find_by_id('btnSure').first.click()
    time.sleep(0.2)
    test = browser.find_by_text(u'验证码')
    if len(test) == 0:
        break
    #保存识别错误的验证码

Ejemplo n.º 7

0

Mostrar archivo

Archivo: scrape_mars.py Proyecto: Quant-Boy/scrapemars

#....each of Mar's hemispheres
usgs_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(usgs_url)

# In[165]:

html = browser.html
soup = bs(html, 'html.parser')
mars_hemis = []

# In[166]:

# loop through the four tags and load the data to the dictionary

for i in range(4):
    time.sleep(5)
    images = browser.find_by_tag('h3')
    images[i].click()
    html = browser.html
    soup = bs(html, 'html.parser')
    partial = soup.find("img", class_="wide-image")["src"]
    img_title = soup.find("h2", class_="title").text
    img_url = 'https://astrogeology.usgs.gov' + partial
    dictionary = {"title": img_title, "img_url": img_url}
    mars_hemis.append(dictionary)
    browser.back()

# In[167]:

print(mars_hemis)

Ejemplo n.º 8

0

Mostrar archivo

def scrape():
    #Create Dictionary for Mongo
    mars_info = {}
    
    #Mars News
    browser = init_browser()
    nasa_url = 'https://mars.nasa.gov/news/'
    browser.visit(nasa_url)
    html = browser.html
    soup = bs(html, 'html.parser')
                
    #Scrape for Most Recent Article then store variable
    latest_article = soup.find("div", "list_text")
    news_title = latest_article.find("div", class_="content_title").text
    news_p = latest_article.find("div", class_="article_teaser_body").text

    #Add Dictionary
    mars_info["news_title"] = news_title
    mars_info["teaser"] = news_p

    # Space Image Site
    jpl_url = "https://jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(jpl_url)

    # JPL Mars Scrape for requested image
    html = browser.html
    soup = bs(html, 'html.parser')
    carousel = soup.find('div', class_= 'carousel_items')
    div_style = carousel.find('article')['style']
    style = cssutils.parseStyle(div_style)
    partial_url = style['background-image']
    

    # Cleaning up image url - Per Recommendation of Learning Assistance
    partial_url = partial_url.replace('url(', '').replace(')', '')
    featured_image_url = "https://jpl.nasa.gov" + partial_url
    #print(featured_image_url)

    # Adding to dictionary - Per Learning Assistant need to remember (needs to be done for images also)
    mars_info["featured_image_url"] = featured_image_url

    # Twitter Navigation for Information
    tweet_url = "https://twitter.com/marswxreport?lang=en"
    browser.visit(tweet_url)
    
    # Most Recent Tweet for Weather
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    mars_weather = soup.find("p", class_="tweet-text").text
    print(mars_weather)

    # Adding to dictionary again
    mars_info["mars_weather"] = mars_weather

    # Mars Fact Site Navigation
    facts_url = "https://space-facts.com/mars/"
    browser.visit(facts_url)

    # Using Panda for Scrape
    facts = pd.read_html(facts_url)
    
    # DataFrame List for Specific Information
    facts_df = pd.DataFrame(facts[0])
    facts_df.columns=['Fact','Result']
    
    # DataFrame utilized for HTML
    mars_table = facts_df.to_html(index=False, justify='left', classes='mars-table')
    mars_table = mars_table.replace('\n', ' ')
    
    # Adding to dictionary - again
    mars_info["mars_table"] = mars_table

    # Going to Site for Image
    hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(hemi_url)

    # Loop to scrape image info with time delay to account for browser navigation
    hemisphere_image_urls = []

    for i in range (4):
        time.sleep(5)
        images = browser.find_by_tag('h3')
        images[i].click()
        html = browser.html
        soup = BeautifulSoup(html, 'html.parser')
        partial_url = soup.find("img", class_="wide-image")["src"]
        image_title = soup.find("h2",class_="title").text
        image_url = 'https://astrogeology.usgs.gov'+ partial_url
        image_dict = {"title":image_title,"image_url":image_url}
        hemisphere_image_urls.append(image_dict)
        browser.back()    
   
    # Adding to dictionary again
    mars_info["hemispheres"] = hemisphere_image_urls

    # Quit browser - Per Learning Assistant recommended
    browser.quit