Python read_all_textの例

プログラミング言語: Python

名前空間/パッケージ名: utils.text_extractor

メソッド/関数: read_all_text

hotexamples.comのコード掲載数: 4

Python read_all_text - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのutils.text_extractor.read_all_textの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def test4():
    text = TextExtractor.read_all_text(os.path.join(constants.DATA_PATH, 'author_topic_filtered', '1002.txt')).split(',')
    text2 = TextExtractor.read_all_text(os.path.join(constants.DATA_PATH, 'author_topic_filtered', '1004.txt')).split(',')
    docs = []
    docs.append(text)
    docs.append(text2)
    tf_idf, corpus, dictionary = train_tf_idf(docs)
    plot_word_cloud(tf_idf, corpus, dictionary, 'filtered_100_wordcloud')

コード例 #2

ファイルを表示

def test():
    docs = []
    doc1 =  TextExtractor.read_all_text(os.path.join(constants.TEST_OUTPUT, "10.txt")).split(",")
    doc2 =  TextExtractor.read_all_text(os.path.join(constants.TEST_OUTPUT, "100.txt")).split(",")
    doc3 =  TextExtractor.read_all_text(os.path.join(constants.TEST_OUTPUT, "101.txt")).split(",")

    docs.append(doc1)
    docs.append(doc2)
    docs.append(doc3)
    tf_idf, corpus, dictionary = train_tf_idf(docs)
    plot_word_cloud(tf_idf, corpus, dictionary)

コード例 #3

ファイルを表示

def test2():
    docs =  []
    count = 0
    for fileName in os.listdir(os.path.join(constants.DATA_PATH,'author_topic_filtered')):
        if count >= 0:
            doc =  TextExtractor.read_all_text(os.path.join(constants.DATA_PATH,'author_topic_filtered', fileName)).split(",")
            docs.append(doc)
        if count >= 600:break
        print count
        count += 1
    tf_idf, corpus, dictionary = train_tf_idf(docs)
    plot_word_cloud(tf_idf, corpus, dictionary, 'filtered_wordcloud')

コード例 #4

ファイルを表示

def test5():
    # text1 = TextExtractor.read_all_text(os.path.join(constants.DATA_PATH, 'authors_topic', '1002.txt'))
    # tokens = Tokenizer.get_tokens(text1)
    # create_word_cloud_from_words(tokens, "Initial Tokens")
    #
    # tokens = Tokenizer.get_stopped_tokens(text1)
    # create_word_cloud_from_words(tokens, "Stop Words Removed")

    # tokens = Tokenizer.get_stemmed_tokens(text1)
    # create_word_cloud_from_words(tokens, "Stemmed Tokens")

    # tokens = TokenFixer.fix_broken_words(tokens)
    # nps = NLPParser.get_noun_phrases(text1)
    # create_word_cloud_from_words(nps, "Noun Phrases")

    tokens0 = TextExtractor.read_all_text(os.path.join(constants.DATA_PATH, 'author_topic_filtered', '1002.txt')).split(',')
    create_word_cloud_from_words(tokens0, "Topics")