Python getStopWords Examples

Programming Language: Python

Namespace/Package Name: stopwords

Method/Function: getStopWords

Examples at hotexamples.com: 3

Python getStopWords - 3 examples found. These are the top rated real world Python examples of stopwords.getStopWords extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def main():
    # Get words to remove and to replace
    print("Loading Words to Remove...")
    commonWordsText = getFileContent("5000_Most_Common_Words.txt", "r")
    removeList = getCommonWords(commonWordsText)
    stopWords = stopwords.getStopWords()
    cet4List = getCET4()

    lemmaText = getFileContent("BNC_lemmafile5.txt", "r")
    lemmaList = getLemmaList(lemmaText)

    # Get words text and clean it
    print("Loading Words List...")
    myString = getFileContent("ken.txt", "r")
    myString = toLowerCase(myString)
    myString = replaceNonLetters(myString)

    print("Lemmalizing...")
    myString = lemmatize(myString, lemmaList)

    print("Removing stopwords...")
    myString = removeCommonWords(myString, removeList)
    myString = removeCommonWords(myString, cet4List)
    myString = removeCommonWords(myString, stopWords)

    print("Counting Frequency...")
    myWordList = myString.split(" ")
    myWordList = [w for w in myWordList if not len(w) < 3]
    myWordFreq = collections.Counter(myWordList)

    # Save result
    print("Save to File...")
    with open("myResult10-3.txt", "a", encoding='utf-8') as f:
        for key, value in myWordFreq.items():
            # Get rid of which frequency less than one
            if value > 1:
                f.write(key + "\t" + str(value) + "\n")
    print("Success...")

Example #2

Show file

File: app.py Project: ajaybhadu/lingvito-language-engine-

#!/usr/bin/env python

import urllib, requests
from bottle import route, run, template, request, static_file
from triggers_json import dic
from elasticsearch import Elasticsearch
import stopwords
stopwords = stopwords.getStopWords()
trigger_list = dic.keys()
print trigger_list

es = Elasticsearch()


@route('/', method = "GET")
def home(name = None):
	return template('template/index.html',name=request.environ.get('REMOTE_ADDR'))

@route('/<query>', method="GET")
def index(query=""):
	
	filtered_query = [i for i in query.split() if i not in stopwords]	##filtering out stopwpords
	
	print query

	template = [dic[x] for x in query if x in dic.keys()]
	print template

	query={
   "size":1,
   "query": {

Example #3

Show file

File: sentencedownload.py Project: c4fcm/MediaCloud-quarterback-investigation

from ConfigParser import SafeConfigParser

ROWS_PER_QUERY = 500000

# Load config data
parser = SafeConfigParser()
parser.read('config.txt')
MY_API_KEY = parser.get('API','MY_API_KEY')
mc = mediacloud.api.AdminMediaCloud(MY_API_KEY) #AdminMediaCloud, rather than MediaCloud

logging.basicConfig(level=logging.DEBUG)
logging.info("-----------------------------------------------------------------")
logging.info("Starting QB data gathering")

# build stopwords
my_stopwords = [word.lower() for word in stopwords.getStopWords()]
qb_table = csv.reader(codecs.open('qb-table.csv', 'r', 'utf-8'))
qb_table.next()
team_stopwords = []    
qb_stopwords = []
for row in qb_table:
    [ team_stopwords.append(word.lower()) for word in row[0].split() ]
    [ qb_stopwords.append(word.lower()) for word in row[1].split() ]
logging.debug(" Added qb names to stopwords: %s" % qb_stopwords)
logging.debug(" Added team names to stopwords: %s" % team_stopwords)
my_stopwords = my_stopwords + qb_stopwords + team_stopwords

# load media sources
m = codecs.open('sources.csv','r','utf-8')
media_reader = csv.reader(m)
media = [x[1] for x in media_reader][1:]