Python stem Examples

Programming Language: Python

Namespace/Package Name: nltk.stem

Method/Function: stem

Examples at hotexamples.com: 5

Python stem - 5 examples found. These are the top rated real world Python examples of nltk.stem.stem extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def get_similitude_projet_activite(projet, activite):
    """
    projet : motivation du filleul
    activite : activité du parrain
    -----------
    Stem all word in description.
    If match with this stem description (1 word activite in projet) return 1 else 0.
    
    If projet or activite is NaN, return 0
    """
    if projet is np.nan:
        return 0
    if activite is np.nan:
        return 0
    projet_stem = [
        stem.stem(word) for word in word_tokenize(projet) if word not in stop
    ]
    activite_stem = [
        stem.stem(word) for word in word_tokenize(activite) if word not in stop
    ]

    if len(set(activite_stem).intersection(projet_stem)) >= 1:
        return 1
    else:
        return 0

Example #2

Show file

File: regression.py Project: armgilles/frateli

def get_similitude_projet_activite(projet, activite):
    """
    projet : motivation du filleul
    activite : activité du parrain
    -----------
    Stem all word in description.
    If match with this stem description (1 word activite in projet) return 1 else 0.
    
    If projet or activite is NaN, return 0
    """
    if projet is np.nan:
        return 0
    if activite is np.nan:
        return 0
    projet_stem = [stem.stem(word) for word in word_tokenize(projet) if word not in stop]
    activite_stem = [stem.stem(word) for word in word_tokenize(activite) if word not in stop]
    
    if len(set(activite_stem).intersection(projet_stem)) >= 1:
        return 1
    else:
        return 0

Example #3

Show file

def lemmatize_stemming(text):
    stemmer = SnowballStemmer("english", ignore_stopwords=True)
    return stemmer.stem(WordNetLemmatizer().lemmatize(text, pos='v'))

Example #4

Show file

def lemmatize_stemming(text):
    return stemmer.stem(WordNetLemmatizer().lemmatize(text, pos='v'))

Example #5

Show file

stopword_list = list(set(nl.stopwords.words('english')))

ps = ps.PorterStemmer()
def generate_tfidf(text_corpora):
    vectorizer = tf.TfidfVectorizer(lowercase=False)
    vectorizer.fit(text_corpora)
    vector = vectorizer.transform(text_corpora)
    return vector
stopword_list = list(set(nl.stopwords.words('english')))
r_df = pd.read_csv("python4.csv",encoding = "ISO-8859-1")
print(r_df)


text_corpora = [s.translate(str.maketrans("","","0123456789")) for s in r_df.loc[:,"scraptweets"]]
words_data = [nt.word_tokenize(s.lower()) for s in text_corpora]
words_data = [[ ps.stem(word) for word in sent if word not in stopword_list ] for sent in words_data  ]
sent_data  = [" ".join(sent) for sent in words_data]

vector = generate_tfidf(sent_data)

kmeans_obj = km.KMeans(n_clusters = 5, max_iter=100)
clusters = kmeans_obj.fit(vector)

r_df["label"]=clusters.labels_  
print("cluster 1")

r_df.loc[r_df["label"]==0]
print(r_df.loc[r_df["label"]==1])
r_df.to_csv("Clustered_tweet2.txt",index=False)

file = open('Clustered_tweet1.txt', encoding="utf8",)