Python BeautifulSoup.title Examples

Programming Language: Python

Namespace/Package Name: BeautifulSoup

Class/Type: BeautifulSoup

Method/Function: title

Examples at hotexamples.com: 4

Python BeautifulSoup.title - 4 examples found. These are the top rated real world Python examples of BeautifulSoup.BeautifulSoup.title extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BeautifulSoup(30)

decompose(30)

first(30)

find_all(30)

findAll(30)

find(30)

fetch(30)

feed(30)

getText(29)

insert(20)

findChildren(19)

body(12)

close(11)

__str__(11)

encode(8)

new_tag(6)

findChild(5)

append(4)

prettify(4)

findSelect(4)

decode(4)

get(4)

__unicode__(3)

goahead(3)

lower(3)

div(3)

findall(3)

pretify(3)

__init__(3)

firstText(2)

pop(2)

data(2)

findNext(2)

read(2)

index(1)

html(1)

query(1)

json(1)

load(1)

re_left(1)

noscript(1)

orig_url(1)

partition(1)

popTag(1)

pretiffy(1)

head(1)

findNextSiblings(1)

group(1)

encodeContents(1)

attrs(1)

Example #1

Show file

File: index_update.py Project: AugustLONG/celery-crawler

    def handle(self, **options):
        since = get_last_change()
        writer = get_writer()
        try:
            while True:
                changes = settings.db.changes(since=since)
                since = changes["last_seq"]
                for changeset in changes["results"]:
                    try:
                        doc = settings.db[changeset["id"]]
                    except couchdb.http.ResourceNotFound:
                        continue
                    if "type" in doc and doc["type"] == "page":
                        print "indexing", doc["url"]
                        soup = BeautifulSoup(doc["content"])
                        if soup.body is None:
                            continue

                        desc = soup.findAll('meta', attrs={ "name": desc_re })

                        writer.update_document(
                                title=unicode(soup.title(text=True)[0]) if soup.title is not None and len(soup.title(text=True)) > 0 else doc["url"],
                                url=unicode(doc["url"]),
                                desc=unicode(desc[0]["content"]) if len(desc) > 0 and desc[0]["content"] is not None else u"",
                                rank=doc["rank"],
                                content=unicode(soup.title(text=True)[0] + "\n" + doc["url"] + "\n" + "".join(soup.body(text=True)))
                            )

                    writer.commit()
                    writer = get_writer()

                set_last_change(since)
        finally:
            set_last_change(since)

Example #2

Show file

    def handle(self, **options):
        since = get_last_change()
        writer = get_writer()
        try:
            while True:
                changes = settings.db.changes(since=since)
                since = changes["last_seq"]
                for changeset in changes["results"]:
                    try:
                        doc = settings.db[changeset["id"]]
                    except couchdb.http.ResourceNotFound:
                        continue
                    if "type" in doc and doc["type"] == "page":
                        print "indexing", doc["url"]
                        soup = BeautifulSoup(doc["content"])
                        if soup.body is None:
                            continue

                        desc = soup.findAll('meta', attrs={"name": desc_re})

                        writer.update_document(
                            title=unicode(soup.title(
                                text=True)[0]) if soup.title is not None
                            and len(soup.title(text=True)) > 0 else doc["url"],
                            url=unicode(doc["url"]),
                            desc=unicode(desc[0]["content"]) if len(desc) > 0
                            and desc[0]["content"] is not None else u"",
                            rank=doc["rank"],
                            content=unicode(
                                soup.title(text=True)[0] + "\n" + doc["url"] +
                                "\n" + "".join(soup.body(text=True))))

                    writer.commit()
                    writer = get_writer()

                set_last_change(since)
        finally:
            set_last_change(since)

Example #3

Show file

File: nist-attenuation-scraper.py Project: mwj87/GlobalDiagnostiX

Energy = []
Mu = []
Muen = []
table = soup.find('table')
for row in table.findAll('tr'):
    col = row.findAll('td')
    if len(str(col).split()) == 3:
        Energy.append(col[0].find(text=True))
        Mu.append(col[1].find(text=True))
        Muen.append(col[2].find(text=True))
print col[1]

plt.loglog(Energy, Mu, label='Mu')
plt.loglog(Energy, Muen, label='Muen')
plt.title(soup.title(text=True))
plt.legend()
plt.show()

URL = 'http://physics.nist.gov/PhysRefData/XrayMassCoef/ComTab/bone.html'
response = urllib2.urlopen(URL)
html = response.read()
soup = BeautifulSoup(html)

Energy = []
Mu = []
Muen = []
table = soup.find('table')
for row in table.findAll('tr'):
    col = row.findAll('td')
    if len(str(col).split()) == 3:

Example #4

Show file

File: nist-attenuation-scraper.py Project: habi/GlobalDiagnostiX

Energy = []
Mu = []
Muen = []
table = soup.find('table')
for row in table.findAll('tr'):
    col = row.findAll('td')
    if len(str(col).split()) == 3:
        Energy.append(col[0].find(text=True))
        Mu.append(col[1].find(text=True))
        Muen.append(col[2].find(text=True))
print col[1]

plt.loglog(Energy, Mu, label='Mu')
plt.loglog(Energy, Muen, label='Muen')
plt.title(soup.title(text=True))
plt.legend()
plt.show()

URL = 'http://physics.nist.gov/PhysRefData/XrayMassCoef/ComTab/bone.html'
response = urllib2.urlopen(URL)
html = response.read()
soup = BeautifulSoup(html)

Energy = []
Mu = []
Muen = []
table = soup.find('table')
for row in table.findAll('tr'):
    col = row.findAll('td')
    if len(str(col).split()) == 3: