Python BeautifulSoup.findAllNext Examples

Programming Language: Python

Namespace/Package Name: bs4

Class/Type: BeautifulSoup

Method/Function: findAllNext

Examples at hotexamples.com: 2

Python BeautifulSoup.findAllNext - 2 examples found. These are the top rated real world Python examples of bs4.BeautifulSoup.findAllNext extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

append(30)

BeautifulSoup(30)

__str__(30)

__init__(11)

attrs(10)

__len__(8)

__repr__(3)

__unicode__(2)

article(2)

__copy__(2)

__getattr__(2)

first(2)

findAllNext(2)

feed(1)

currentTag(1)

fartind(1)

BF(1)

filter_wikilinks(1)

fina_all(1)

fnd_all(1)

h1(1)

replace_with(1)

td(1)

toCSV(1)

copy(1)

alcohol(1)

astype(1)

assign(1)

apply(1)

add_structure(1)

add_shared_term(1)

a(1)

_title(1)

_repr_html_(1)

_find_all(1)

_all_strings(1)

__getitem__(1)

__contains__(1)

NavigableString(1)

Date(1)

wrap(1)

Example #1

Show file

File: test.py Project: eeandy79/sheetmusic

           ]

for url in url_list:
	print url
	req = urllib2.Request(url, headers=hdr)

	try:
		page = urllib2.urlopen(req)
	except urllib2.HTTPError, e:
		#print e.fp.read()
		continue

	# foundtext contains all composer for one alphebet
	foundtext = BeautifulSoup(page.read(), 'html5lib').find('h1')

	for link in foundtext.findAllNext('a'):
		if "classical" in link.get('href'):
			composer_name = link.contents[0]

			# open page contains all songs from one composer
			req = urllib2.Request(link.get('href'), headers=hdr)
			page = urllib2.urlopen(req)
			foundtext2 = BeautifulSoup(page.read(), 'html5lib').find('h1')

			for link2 in foundtext2.findAllNext('a'):
				if "violin.pdf" in link2.get('href'):
					pdf_url_value = link2.get('href')

					values = {'composer':composer_name, 'pdf_url':pdf_url_value}
					#print url
					con.execute("insert into music VALUES (:composer, :pdf_url)", values);

Example #2

Show file

File: test.py Project: eeandy79/sheetmusic

import sys, urllib, urllib2, cookielib
from bs4 import BeautifulSoup 
import re
import sqlite3

with open ("bach.html", "r") as myfile:
	data = myfile.read()

foundtext = BeautifulSoup(data, 'html5lib').find('h1')

for urls in foundtext.findAllNext('a'):
	if ".pdf" in urls.get('href'):
		if "violin" in urls.contents[0]:
			url_value = urls.get('href')
			split_result = url_value.split('/')
			print split_result[len(split_result)-1]
			print urls.get('href')
		elif "score, piano" not in urls.contents[0]:
			print urls.contents[0] + urls.get('href')
			

#for link2 in foundtext2.findAllNext('a'):
#	if "violin.pdf" in link2.get('href'):
#		pdf_url_value = link2.get('href')