Python PunktWordTokenizer.strip Examples

Programming Language: Python

Namespace/Package Name: nltk.tokenize.punkt

Method/Function: strip

Examples at hotexamples.com: 2

Python PunktWordTokenizer.strip - 2 examples found. These are the top rated real world Python examples of nltk.tokenize.punkt.PunktWordTokenizer.strip extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PunktWordTokenizer(30)

tokenize(5)

strip(2)

append(1)

index(1)

remove(1)

Example #1

Show file

File: newsSummarizer.py Project: Anhmike/NewsSummarizer

def usingTitleAlgorithm(userinput):
    #nltk's english stopword list
    stop = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours',
     'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves',
     'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its',
     'itself', 'they', 'them', 'their', 'theirs', 'themselves',
     'what', 'which', 'who', 'whom', 'this', 'that', 'these',
     'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
     'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a',
     'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until',
     'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between',
     'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to',
     'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under',
    'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where',
     'why', 'how', 'all', 'any', 'both', 'each', 'few','more', 'most', 'other', 'some', 'such',
     'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'can', 'will', 'just', 'don', 'should', 'now']
    url = str(userinput)
    title, htmlText = web_crawler(url)
    qry = PunktWordTokenizer().tokenize(title)  #tokenize title
    qry = [words for words in qry if words.lower() not in stop] #Run Query through stopwords
    totalText = PunktSentenceTokenizer().tokenize(htmlText)
    textList = []
    for i in totalText:
        i = PunktWordTokenizer().tokenize(i.strip('.'))
        textList.append(i)

    sentenceRanks = summarizer(qry, textList)
    finalResults = []
    for num in range(1,4):  #skipping the first sentence because it's just going to be the title
        ind = sentenceRanks[num][0]
        finalResults.append(' '.join(textList[ind]))
    finalResults.append(title)
    return finalResults

Example #2

Show file

File: newsSummarizer.py Project: Anhmike/NewsSummarizer

def usingSentenceIntersectionAlgorithm(userinput):
    url = userinput
    title, htmlText = web_crawler(url)
    qry = PunktWordTokenizer().tokenize(title)  #tokenize title
    
    totalText = PunktSentenceTokenizer().tokenize(htmlText)
    textList = []
    for i in totalText:
        i = PunktWordTokenizer().tokenize(i.strip('.'))
        textList.append(i)
    ranks = sentence_ranks(textList)
    ranks_sorted = sorted(ranks.items(), key = operator.itemgetter(1))
    ranks_sorted.reverse()
    
    finalResults = []
    for num in range(1,4):  #skipping the first sentence because it's just going to be the title
        finalResults.append(ranks_sorted[num][0])
    finalResults.append(title)
    
    return finalResults