Python Vocabulary.add_chunkの例

プログラミング言語: Python

名前空間/パッケージ名: vocabulary

クラス/型: Vocabulary

メソッド/関数: add_chunk

hotexamples.comのコード掲載数: 1

Python Vocabulary.add_chunk - 1件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのvocabulary.Vocabulary.add_chunkの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Vocabulary(30)

add_word(15)

clean_text(8)

build_vocab(8)

add_words(8)

deserialize(7)

compile(4)

add(4)

antonym(4)

auto_punctuate(3)

add_token(3)

encode(3)

add_from_file(2)

decode_output(2)

getUniGrams(2)

from_documents(2)

build_corpus(2)

getVocabularyByDocument(2)

getBiGrams(2)

get_id_from_token(2)

add_a_word(2)

add_text(2)

add_many(2)

getFullDict(2)

gen_DAG(1)

from_text_files(1)

from_text(1)

from_serializable(1)

from_sentences(1)

get(1)

add_constant(1)

getPTStopWords(1)

getQuestions(1)

getVocabularySize(1)

get_all_source_words(1)

get_all_translations(1)

get_pos(1)

get_term_text(1)

make_dictionary(1)

seg_content(1)

from_nlp_data(1)

encode_sent(1)

from_idx2word_dict(1)

convert_sentence(1)

add_new_word(1)

add_sentence(1)

add_chunk(1)

add_word_lst(1)

append(1)

build(1)

コード例 #1

ファイルを表示

ファイル: process_text.py プロジェクト: janoserdody/text_processing

    def process(self, filename_read, filename_save, header_string, footer_string, sentence_max_len = 15):
        self.header_string = header_string

        rows = self.readfile(filename_read)

        result = np.asarray([' '], dtype=np.str)

        rows = self.dropheader(rows)

        for row in rows:
            # find footer
            if row.find(footer_string) != -1:
                break
            row = row.lower().translate(self.translation)
            s = np.asarray(row, dtype=np.str)
            result = np.char.add(result, s)

        self.text = np.char.split(result).tolist()[0]

        self.text = list(filter(lambda stop: (self.is_stopword(stop) == bool(0)), self.text))

        for x in range(len(self.text)):
            if x >= len(self.text):
                break
            self.text[x] = self.convert2number(x, 0)

        result = np.asarray(self.text)

        text_lem = np.asarray([' '], dtype=np.str)

        for item in result:
            try:
                y = self.stem_words[item]
            except:
                y = item
            y = np.char.split(self.expand_contractions(y))
            arr = y.tolist()
            text_lem = np.append(text_lem, arr)

        voc = Vocabulary('vocabulary')

        sentence = []

        text_out = [voc.to_token(self.SOS_token)]

        sen_length = 0

        for word in text_lem:
            if word == " ":
                continue
            word_cleaned = word.translate(self.trans_punctuation)
            sentence.append(word_cleaned)
            sen_length += 1
            if self.is_sentence_end(word) \
                    or (self.is_sentence_conditional_end(word, word_cleaned) and sen_length > sentence_max_len):
                if word_cleaned != "and":
                    text_out.append(word_cleaned)

                sentence_str = ' '.join(sentence).replace(" and ","")
                voc.add_chunk(sentence_str)
                text_out.append(voc.to_token(self.EOS_token))
                text_out.append(voc.to_token(self.SOS_token))
                sentence.clear()
                sen_length = 0
            elif word_cleaned != "and":
                text_out.append(word_cleaned)

        text_out[len(text_out) - 1] = "\n"

        voc.save_data(filename_save)

        x = 0
        file_proc = open(filename_save, "w")
        for token in text_out:
            if token == voc.to_token(self.EOS_token):
                for i in range(voc.count_longest_sentence() - x):
                    file_proc.write(voc.to_token(self.PAD_token) + " ")
                x = 0
            file_proc.write(token + " ")
            x += 1

        file_proc.close()