Python BeautifulSoup.lower Examples

Programming Language: Python

Namespace/Package Name: BeautifulSoup

Class/Type: BeautifulSoup

Method/Function: lower

Examples at hotexamples.com: 6

Python BeautifulSoup.lower - 6 examples found. These are the top rated real world Python examples of BeautifulSoup.BeautifulSoup.lower extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BeautifulSoup(30)

decompose(30)

first(30)

find_all(30)

findAll(30)

find(30)

fetch(30)

feed(30)

getText(29)

insert(20)

findChildren(19)

body(12)

close(11)

__str__(11)

encode(8)

new_tag(6)

findChild(5)

append(4)

prettify(4)

findSelect(4)

decode(4)

get(4)

__unicode__(3)

goahead(3)

lower(3)

div(3)

findall(3)

pretify(3)

__init__(3)

firstText(2)

pop(2)

data(2)

findNext(2)

read(2)

index(1)

html(1)

query(1)

json(1)

load(1)

re_left(1)

noscript(1)

orig_url(1)

partition(1)

popTag(1)

pretiffy(1)

head(1)

findNextSiblings(1)

group(1)

encodeContents(1)

attrs(1)

Example #1

Show file

File: KaggleWord2VecUtility.py Project: skriyaz95/text-based-event-recognition

    def review_to_wordlist(review, remove_stopwords=False):
        # Function to convert a document to a sequence of words,
        # optionally removing stop words.  Returns a list of words.
        #
        # 1. Remove HTML
        review_text = BeautifulSoup(review).getText()
        #
        # 2. Remove non-letters
        review_text = re.sub("[^a-zA-Z]", " ", review_text)
        #
        # 3. Convert words to lower case and split them
        words = review_text.lower().split()

        #
        # 4. Optionally remove stop words (false by default)
        if remove_stopwords:
            stops = set(stopwords.words("english"))
            words = [w for w in words if not w in stops]
        #taggedwords= nltk.pos_tag(words)
        #taggedwords= [s for s in taggedwords if s[1] != 'NN']
        #lists= []
        #for i in xrange( 0,len(taggedwords)):
        #    lists.append(taggedwords[i][0])
        #taggedwords.
        # 5. Return a list of words
        return (words)

Example #2

Show file

File: LRLBFGS_Word2Vec.py Project: vivekparekh8/FinalsProject

def paragraph_to_wordlist( raw_review):
    # Function to clean data
    #
    # removing html tags using BeautifulSoup api
    review_text = BeautifulSoup(raw_review).text
    #  
    # removing non-alpahbetical data
    review_text = re.sub("[^a-zA-Z]"," ", review_text)
    #
    # converting to consistant lowercase
    words = review_text.lower().split()
    return(words)

Example #3

Show file

File: KMeans_Word2Vec.py Project: sp2020jarvan3/CSYE7374_FinalProject

def paragraph_to_wordlist(raw_review):
    # Function to clean data
    #
    # removing html tags using BeautifulSoup api
    review_text = BeautifulSoup(raw_review).text
    #
    # removing non-alpahbetical data
    review_text = re.sub("[^a-zA-Z]", " ", review_text)
    #
    # converting to consistant lowercase
    words = review_text.lower().split()
    return (words)

Example #4

Show file

File: snippet_generator.py Project: darthfork/meta-search

    def generate_snippet(self,url):
        snippet_list = []
        req = urllib2.Request(url)
        req.add_header('User-Agent', 'Mozilla/5.0' )
        try:
            page = urllib2.urlopen(req)
            code = page.getcode()
            text = BeautifulSoup(page.read()).body.getText()
            for s in self.query:
                if s not in self.stopwords:
                    ind = text.lower().find(s)
                    if ind not in snippet_list:
                        self.snippet += text[max(ind-30,0):min(ind+30,(len(text)-1))].strip()
                        self.snippet += u"... "
                        snippet_list.append(ind)

        except urllib2.HTTPError, err:
            return (None,err.code)

Example #5

Show file

File: Kmeans_cluster.py Project: vivekparekh8/FinalsProject

def review_to_wordlist( raw_review, remove_stopwords=False ):
    # Function to convert a document to a sequence of words,
    # optionally removing stop words.  Returns a list of words.
    #
    # 1. Remove HTML
    review_text = BeautifulSoup(raw_review).text
    #  
    # 2. Remove non-letters
    review_text = re.sub("[^a-zA-Z]"," ", review_text)
    #
    # 3. Convert words to lower case and split them
    words = review_text.lower().split()
    #
    # 4. Optionally remove stop words (false by default)
    if remove_stopwords:
        words = [w for w in words if not w in stops]
    #
    # 5. Return a list of words
    return(words)

Example #6

Show file

def review_to_wordlist(raw_review, remove_stopwords=False):
    # Function to convert a document to a sequence of words,
    # optionally removing stop words.  Returns a list of words.
    #
    # 1. Remove HTML
    review_text = BeautifulSoup(raw_review).text
    #
    # 2. Remove non-letters
    review_text = re.sub("[^a-zA-Z]", " ", review_text)
    #
    # 3. Convert words to lower case and split them
    words = review_text.lower().split()
    #
    # 4. Optionally remove stop words (false by default)
    if remove_stopwords:
        words = [w for w in words if not w in stops]
    #
    # 5. Return a list of words
    return (words)