Python removeStopList Examples

Programming Language: Python

Namespace/Package Name: stoplist

Method/Function: removeStopList

Examples at hotexamples.com: 4

Python removeStopList - 4 examples found. These are the top rated real world Python examples of stoplist.removeStopList extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: tweets2GenderSvmlight.py Project: pyongjoo/twitter-research

def loadContext(user_id):
    words_arr = loadWordsArr(user_id)
    words_arr = removeStopList(words_arr)
    length = len(words_arr)
    context = Counter(words_arr).most_common()
    context = map(lambda x: (x[0], x[1] / float(length)), context)
    return context

Example #2

Show file

File: text2feature.py Project: pyongjoo/twitter-research

    def vectorize(self, text):

        # better filter maybe
        texts = text.split('\n')
        #texts = filter(lambda t: not re.search('year', t), texts)
        #texts = filter(lambda t: not re.search('old', t), texts)
        texts = filter(lambda t: not re.search(r'\d{2} years old', t), texts)

        text = '\n'.join(texts)

        # Replace Twitter specific patterns.
        #text = re.sub(r'https?:\S+', ' ', text)
        #text = re.sub(r'\b\d{2} years old', ' ', text)
        #text = re.sub(r'old', ' ', text)
        #text = re.sub(r'years', ' ', text)
        #text = re.sub(r'\d+', ' ', text)
        #text = re.sub(r'#\w+', ' ', text)
        #text = re.sub(r'@\w+', ' ', text)

        # tokenize and remove stoplist
        words_arr = re.findall(r'\w+', text)
        words_arr = removeStopList(words_arr)

        # normalize
        length = len(words_arr)
        context = Counter(words_arr).most_common()
        context = map(lambda x: (x[0], x[1] / float(length)), context)

        return context

Example #3

Show file

File: contextReader.py Project: pyongjoo/twitter-research

def contextFromText(text):
    text = replaceTWPattern(text)

    words_arr = re.findall(r'\w+', text)
    words_arr = removeStopList(words_arr)
    length = len(words_arr)

    context = Counter(words_arr).most_common()
    context = map(lambda x: (x[0], x[1] / float(length)), context)

    return context

Example #4

Show file

File: text2feature_gender.py Project: pyongjoo/twitter-research

    def vectorize(self, text):

        # better filter maybe
        texts = text.split("\n")
        texts = filter(lambda t: not re.search(r"\b(I am|I'm|Im) a (man|woman)\b", t, re.I), texts)

        text = "\n".join(texts)

        # tokenize and remove stoplist
        words_arr = re.findall(r"\w+", text)
        words_arr = removeStopList(words_arr)

        # normalize
        length = len(words_arr)
        context = Counter(words_arr).most_common()
        context = map(lambda x: (x[0], x[1] / float(length)), context)

        return context