Python Tokenizer.index_word 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: keras.preprocessing.text

클래스/타입: Tokenizer

메소드/함수: index_word

hotexamples.com에서의 예제들: 4

Python Tokenizer.index_word - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 keras.preprocessing.text.Tokenizer.index_word에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Tokenizer(30)

fit_on_texts(30)

texts_to_sequences(30)

sequences_to_texts(30)

sequences_to_matrix(30)

word_index(30)

fit_on_sequences(18)

num_words(16)

to_json(16)

filters(16)

texts_to_matrix(14)

texts_to_sequences_generator(11)

__init__(8)

text_to_sequences(7)

fit(6)

transform(6)

index_docs(5)

word_counts(4)

index_word(4)

word_docs(4)

get_feature_names(3)

text_to_matrix(3)

tabeled_tokens_to_matrix(1)

add_row(1)

texts_to_sequence(1)

vocabulary_size(1)

textsToSequences(1)

text_to_sequence(1)

encode(1)

create_tokenizer(1)

do_caps(1)

save(1)

extend(1)

process(1)

proc_text(1)

num_word(1)

num_tokens(1)

labelizeTweets(1)

get_index(1)

get_config(1)

fit_transform(1)

document_count(1)

fitToText(1)

py(1)

예제 #1

파일 보기

파일: filter.py 프로젝트: petartonchev/weast

def tokenizer_from_json(json_string):
    """Parses a JSON tokenizer configuration file and returns a
    tokenizer instance.
    # Arguments
        json_string: JSON string encoding a tokenizer configuration.
    # Returns
        A Keras Tokenizer instance
    """
    tokenizer_config = json.loads(json_string)
    config = tokenizer_config.get('config')

    word_counts = json.loads(config.pop('word_counts'))
    word_docs = json.loads(config.pop('word_docs'))
    index_docs = json.loads(config.pop('index_docs'))
    # Integer indexing gets converted to strings with json.dumps()
    index_docs = {int(k): v for k, v in index_docs.items()}
    index_word = json.loads(config.pop('index_word'))
    index_word = {int(k): v for k, v in index_word.items()}
    word_index = json.loads(config.pop('word_index'))

    tokenizer = Tokenizer(**config)
    tokenizer.word_counts = word_counts
    tokenizer.word_docs = word_docs
    tokenizer.index_docs = index_docs
    tokenizer.word_index = word_index
    tokenizer.index_word = index_word

    return tokenizer

예제 #2

파일 보기

 def loadTokenzier(self, directory):
     with open(directory, encoding='UTF-8-sig') as fh:
         data = json.load(fh)
     tk = Tokenizer()
     key = list(data.keys())
     for i in key:
         setattr(tk, i, data[i])
     VOCAB_SIZE = len(tk.word_index) + 1
     self.START_TOKEN, self.END_TOKEN = [VOCAB_SIZE], [VOCAB_SIZE + 1]
     self.VOCAB_SIZE = VOCAB_SIZE + 2
     # 토크나이저 로드하면 모든 key,value가 string으로 들어감 나중에 토큰을 텍스트로
     # 복원할 때 정상적으로 구동하기 위해서 index_word는 key를 int로 바꿔줌
     tk.index_word = {int(k): v for k, v in tk.index_word.items()}
     return tk

예제 #3

파일 보기

def tokenizer_from_json(json_string):
    tokenizer_config = json.loads(json_string)
    config = tokenizer_config.get('config')

    word_counts = json.loads(config.pop('word_counts'))
    word_docs = json.loads(config.pop('word_docs'))
    index_docs = json.loads(config.pop('index_docs'))
    # Integer indexing gets converted to strings with json.dumps()
    index_docs = {int(k): v for k, v in index_docs.items()}
    index_word = json.loads(config.pop('index_word'))
    index_word = {int(k): v for k, v in index_word.items()}
    word_index = json.loads(config.pop('word_index'))

    tokenizer = Tokenizer(**config)
    tokenizer.word_counts = word_counts
    tokenizer.word_docs = word_docs
    tokenizer.index_docs = index_docs
    tokenizer.word_index = word_index
    tokenizer.index_word = index_word

    return tokenizer

예제 #4

파일 보기

파일: utils.py 프로젝트: zemlatruskavets/lstm_cracker

    word_counts = json.loads(config.pop('word_counts'))
    word_docs   = json.loads(config.pop('word_docs'))
    index_docs  = json.loads(config.pop('index_docs'))
    
    # Integer indexing gets converted to strings with json.dumps()
    index_docs = {int(k): v for k, v in index_docs.items()}
    index_word = json.loads(config.pop('index_word'))
    index_word = {int(k): v for k, v in index_word.items()}
    word_index = json.loads(config.pop('word_index'))

    tokenizer             = Tokenizer(**config)
    tokenizer.word_counts = word_counts
    tokenizer.word_docs   = word_docs
    tokenizer.index_docs  = index_docs
    tokenizer.word_index  = word_index
    tokenizer.index_word  = index_word


    return tokenizer





def create_tf_example_row(input_row):

    # convert to string
    password = str(input_row[0])

    # create tf example
    tf_example = tf.train.Example(features=tf.train.Features(feature={