Python stem_word Examples

Programming Language: Python

Namespace/Package Name: preprocess

Method/Function: stem_word

Examples at hotexamples.com: 5

Python stem_word - 5 examples found. These are the top rated real world Python examples of preprocess.stem_word extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

0

Show file

File: tweets_feature_extractor.py Project: kstrauch94/HPVTweets

def process_subjectivity_file(filename, stem):
    """
  Load subjecitvity score lookup
  
  :params:
    filename (str) : path to file
    stem (bool) : stem word if true
  :return:
    scores (dict) : word-scores lookup
  """

    scores = {"__dict_name__": "subj scores"}

    with open(filename, "r") as f:
        for line in f.readlines():
            if line == '\n':
                pass
            else:
                line = line.split(" ")
                word = line[2].split("=")[1]
                score = line[-1].split("=")[1].strip()
                if score == "negative":
                    score = -1
                elif score == "positive":
                    score = 1
                else:
                    score = 0

                scores[stem_word(word, stem)] = score

    return scores

Example #2

0

Show file

File: tweets_feature_extractor.py Project: kstrauch94/HPVTweets

def get_bigram_sentiments(bigrams_path, stem):
    """
  Creates bigram sentiments lookup table
  
  :params:
    bigrams_path (str) : path to tweet bigram sentiments
    stem (bool) : stem word if true
    
  :returns:
    bigram_sentiments (dict) : key = word, value = score
  """

    bigram_sentiments = {"__dict_name__": "bigram sentiments"}
    # also doesnt work on windows without the encoding parameter
    with open(bigrams_path, encoding="utf-8") as infile:
        for line in infile:
            w1, w2, score, pos, neg = line.split()
            w1 = stem_word(w1, stem)
            w2 = stem_word(w2, stem)
            bigram_sentiments[w1, w2] = float(score)

    return bigram_sentiments

Example #3

0

Show file

File: tweets_feature_extractor.py Project: kstrauch94/HPVTweets

def get_pos_neg_words(pos_file, neg_file, stem):
    """
  Create set of positive and negative words
  
  :params:
    pos_file (str) : Opinion Lexicon. https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html
    neg_file (str) : Opinion Lexicon. https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html
    stem (bool) : stem word if true
    
  :return:
    (pos_vocab,neg_vocab) (set) : set of words
    
  """

    n = open(pos_file).readlines()
    p = open(neg_file).readlines()
    neg_vocab = set(
        [stem_word(w.strip(), stem) for w in n if not w.startswith(';')])
    pos_vocab = set(
        [stem_word(w.strip(), stem) for w in p if not w.startswith(';')])

    return pos_vocab, neg_vocab

Example #4

0

Show file

File: tweets_feature_extractor.py Project: kstrauch94/HPVTweets

def get_clusters(cluster_path, stem):
    """
  Creates clusters lookup table
  
  :params:
    cluster_path (str) : path to tweet clusters
    stem (bool) : stem word if true
  :returns:
    clusters (dict) : key = word, value = cluster
  """

    clusters = {}
    # also doesnt work on windows without the encoding parameter
    with open(cluster_path, encoding="utf-8") as infile:
        for line in infile:
            cluster, word, i = line.split('\t')
            word = stem_word(word, stem)
            clusters[word] = cluster

    return clusters

Example #5

0

Show file

File: tweets_feature_extractor.py Project: kstrauch94/HPVTweets

def get_negation_list(stem):
    """
  Retrieve negation words in list. Just in case we want to add more words
  or find a 'negation lexicon'
  
  :return:
    neg_set (set) : list of common negation words
    stem (bool) : stem word if true
  """

    neg_set = set([
        'none', 'hasnt', 'couldnt', 'nowhere', 'havent', 'dont', 'cant',
        'didnt', 'arent', 'never', 'not', 'nothing', 'nobody', 'wouldnt',
        'hadnt', 'shouldnt', 'noone', 'aint', 'isnt', 'neither', 'wont',
        'doesnt', 'no'
    ])

    if stem:
        neg_set = set([stem_word(w, stem) for w in neg_set])

    return neg_set