Python Tokenizer.word_counts 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: keras.preprocessing.text

클래스/타입: Tokenizer

메소드/함수: word_counts

hotexamples.com에서의 예제들: 4

Python Tokenizer.word_counts - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 keras.preprocessing.text.Tokenizer.word_counts에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Tokenizer(30)

fit_on_texts(30)

texts_to_sequences(30)

sequences_to_texts(30)

sequences_to_matrix(30)

word_index(30)

fit_on_sequences(18)

num_words(16)

to_json(16)

filters(16)

texts_to_matrix(14)

texts_to_sequences_generator(11)

__init__(8)

text_to_sequences(7)

fit(6)

transform(6)

index_docs(5)

word_counts(4)

index_word(4)

word_docs(4)

get_feature_names(3)

text_to_matrix(3)

tabeled_tokens_to_matrix(1)

add_row(1)

texts_to_sequence(1)

vocabulary_size(1)

textsToSequences(1)

text_to_sequence(1)

encode(1)

create_tokenizer(1)

do_caps(1)

save(1)

extend(1)

process(1)

proc_text(1)

num_word(1)

num_tokens(1)

labelizeTweets(1)

get_index(1)

get_config(1)

fit_transform(1)

document_count(1)

fitToText(1)

py(1)

예제 #1

파일 보기

파일: filter.py 프로젝트: petartonchev/weast

def tokenizer_from_json(json_string):
    """Parses a JSON tokenizer configuration file and returns a
    tokenizer instance.
    # Arguments
        json_string: JSON string encoding a tokenizer configuration.
    # Returns
        A Keras Tokenizer instance
    """
    tokenizer_config = json.loads(json_string)
    config = tokenizer_config.get('config')

    word_counts = json.loads(config.pop('word_counts'))
    word_docs = json.loads(config.pop('word_docs'))
    index_docs = json.loads(config.pop('index_docs'))
    # Integer indexing gets converted to strings with json.dumps()
    index_docs = {int(k): v for k, v in index_docs.items()}
    index_word = json.loads(config.pop('index_word'))
    index_word = {int(k): v for k, v in index_word.items()}
    word_index = json.loads(config.pop('word_index'))

    tokenizer = Tokenizer(**config)
    tokenizer.word_counts = word_counts
    tokenizer.word_docs = word_docs
    tokenizer.index_docs = index_docs
    tokenizer.word_index = word_index
    tokenizer.index_word = index_word

    return tokenizer

예제 #2

파일 보기

def load_tokenizer_from_file(filename):

    tokenizer = Tokenizer()

    with open(filename, 'r') as infile:
        tokenizer_data = json.load(infile)

    tokenizer.word_counts = OrderedDict(tokenizer_data['word_counts'])
    tokenizer.word_docs = tokenizer_data['word_docs']
    tokenizer.word_index = tokenizer_data['word_index']
    tokenizer.document_count = tokenizer_data['document_count']
    tokenizer.index_docs = tokenizer_data['index_docs']

    return tokenizer

예제 #3

파일 보기

def tokenizer_from_json(json_string):
    tokenizer_config = json.loads(json_string)
    config = tokenizer_config.get('config')

    word_counts = json.loads(config.pop('word_counts'))
    word_docs = json.loads(config.pop('word_docs'))
    index_docs = json.loads(config.pop('index_docs'))
    # Integer indexing gets converted to strings with json.dumps()
    index_docs = {int(k): v for k, v in index_docs.items()}
    index_word = json.loads(config.pop('index_word'))
    index_word = {int(k): v for k, v in index_word.items()}
    word_index = json.loads(config.pop('word_index'))

    tokenizer = Tokenizer(**config)
    tokenizer.word_counts = word_counts
    tokenizer.word_docs = word_docs
    tokenizer.index_docs = index_docs
    tokenizer.word_index = word_index
    tokenizer.index_word = index_word

    return tokenizer

예제 #4

파일 보기

파일: utils.py 프로젝트: zemlatruskavets/lstm_cracker


    tokenizer_config = json.loads(json_string)
    config           = tokenizer_config.get('config')

    word_counts = json.loads(config.pop('word_counts'))
    word_docs   = json.loads(config.pop('word_docs'))
    index_docs  = json.loads(config.pop('index_docs'))
    
    # Integer indexing gets converted to strings with json.dumps()
    index_docs = {int(k): v for k, v in index_docs.items()}
    index_word = json.loads(config.pop('index_word'))
    index_word = {int(k): v for k, v in index_word.items()}
    word_index = json.loads(config.pop('word_index'))

    tokenizer             = Tokenizer(**config)
    tokenizer.word_counts = word_counts
    tokenizer.word_docs   = word_docs
    tokenizer.index_docs  = index_docs
    tokenizer.word_index  = word_index
    tokenizer.index_word  = index_word


    return tokenizer





def create_tf_example_row(input_row):

    # convert to string