Python is_stopwordの例

プログラミング言語: Python

名前空間/パッケージ名: stopwords

メソッド/関数: is_stopword

hotexamples.comのコード掲載数: 6

Python is_stopword - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのstopwords.is_stopwordの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: naives.py プロジェクト: LoveToCommit/nlp-1

def all_words(lyrics):
    for l in lyrics:
        for w in nltk.word_tokenize(" ".join(l["lyrics"])):
            #             print w, unigram_tagger.tag([w])
            if l.get("title") and (not is_stopword(w)) and valid(
                    unigram_tagger.tag([w])[0][1]):
                yield w.lower()

コード例 #2

ファイルを表示

ファイル: tfidf.py プロジェクト: Big-Data/nlp

def add_info(lyric, text_col, i=0):

    def prep_word(w):
        return w.lower()
    
    a = dict(lyric)
    a["lyrics"] = Text(w.lower() for w in nltk.word_tokenize(" ".join(a["lyrics"]).encode("utf-8")))
    a["terms"] = set(w for w in a["lyrics"] if not is_stopword(w) and valid(w))
    
    a["tf_idf"] = dict(sorted( ( (t, text_col.tf_idf(t, a["lyrics"])) for t in a["terms"] ), key=lambda x : x[1]) )
    i += 1
    print i
    return a

コード例 #3

ファイルを表示

def add_info(lyric, text_col, i=0):
    def prep_word(w):
        return w.lower()

    a = dict(lyric)
    a["lyrics"] = Text(
        w.lower()
        for w in nltk.word_tokenize(" ".join(a["lyrics"]).encode("utf-8")))
    a["terms"] = set(w for w in a["lyrics"] if not is_stopword(w) and valid(w))

    a["tf_idf"] = dict(
        sorted(((t, text_col.tf_idf(t, a["lyrics"])) for t in a["terms"]),
               key=lambda x: x[1]))
    i += 1
    print i
    return a

コード例 #4

ファイルを表示

ファイル: tfidf.py プロジェクト: Big-Data/nlp

def add_info(lyric, text_col, i=0):

    def prep_word(w):
        return w.lower()
    
    a = dict(lyric)
    a["lyrics"] = Text(w.lower() for w in nltk.word_tokenize(" ".join(a["lyrics"]).encode("utf-8")))
    a["terms"] = set(w for w in a["lyrics"] if not is_stopword(w) and valid(w))
    
    a["tf_idf"] = dict(sorted( ( (t, text_col.tf_idf(t, a["lyrics"])) for t in a["terms"] ), key=lambda x : x[1]) )
    i += 1
    print i
    return a
    
ts = TextCollection([Text((w.lower() 
                                for w in nltk.word_tokenize(" ".join(l["lyrics"]).encode('utf-8')) if not is_stopword(w) and valid(w) ) , 
                         name = l["id"]) 
                    for l in lyrics if l.get("id")])

#lyrics = map(lambda x : add_info(x[1], ts, x[0]), ((i, l) for i, l in enumerate(lyrics) if l.get("id")))

#with open("withtfidf.pickle", "w") as f:
#    pickle.dump(lyrics, f)

with open("withtfidf.pickle", "r") as f:
    lyrics = pickle.load(f)
    
lyrics = dict((l["id"], l) for l in lyrics)

def similar_lyrics(lyric):
    for l in lyrics.values():

コード例 #5

ファイルを表示

ファイル: naives.py プロジェクト: Big-Data/nlp

def all_words(lyrics):
    for l in lyrics:
        for w in nltk.word_tokenize(" ".join(l["lyrics"])):
#             print w, unigram_tagger.tag([w])
             if l.get("title") and (not is_stopword(w)) and valid(unigram_tagger.tag([w])[0][1]):
                yield w.lower()

コード例 #6

ファイルを表示

        w.lower()
        for w in nltk.word_tokenize(" ".join(a["lyrics"]).encode("utf-8")))
    a["terms"] = set(w for w in a["lyrics"] if not is_stopword(w) and valid(w))

    a["tf_idf"] = dict(
        sorted(((t, text_col.tf_idf(t, a["lyrics"])) for t in a["terms"]),
               key=lambda x: x[1]))
    i += 1
    print i
    return a


ts = TextCollection([
    Text((w.lower()
          for w in nltk.word_tokenize(" ".join(l["lyrics"]).encode('utf-8'))
          if not is_stopword(w) and valid(w)),
         name=l["id"]) for l in lyrics if l.get("id")
])

#lyrics = map(lambda x : add_info(x[1], ts, x[0]), ((i, l) for i, l in enumerate(lyrics) if l.get("id")))

#with open("withtfidf.pickle", "w") as f:
#    pickle.dump(lyrics, f)

with open("withtfidf.pickle", "r") as f:
    lyrics = pickle.load(f)

lyrics = dict((l["id"], l) for l in lyrics)


def similar_lyrics(lyric):