예제 #1
0
print('단어 카운트:', token.word_counts)
print('문장 카운트:', token.document_count)
print('각 단어가 몇개의 문장에 포함되어 있는가 :', token.word_docs)
print('각 단어에 매겨진 인덱스 값 :', token.word_index)

print()
# 텍스트를 읽고 긍정 , 부정 분류 예측 

docs = ['너무 재밌네요', '최고에요','참 잘만든 영화예요','추천하고 싶은 영화네요','한번 더 보고싶네요',
        '글쎄요','별로네요','생각보다 지루합니다','연기가 좋지않아요','재미없어요']

import numpy as np 
classes = np.array([1,1,1,1,1,0,0,0,0,0])

token = Tokenizer()
token.fit_on_texts(docs)
print(token.word_index)

model = Sequential()
model.add(Embedding(word_size,8,input_length=4))
#model.add(Flatten())
model.add(LSTM(32))
model.add(Dense(1,activation='sigmoid'))

print(model.summary())
model.compile(optimizer='adam',loss='binary_crossentropy')




예제 #2
0
def build_model(use_gpu: bool = False,
                num_units: int = 64,
                num_layers: int = 1,
                dropout_rate: float = 0.0,
                batch_size: int = 1000,
                window_size: int = 10,
                num_params: int = 0):
    """
    Builds the RNN-Model for character prediction.

    :param window_size: Sequence size
    :param batch_size: {int} Size of batch
    :param dropout_rate: {float} Regulating Dropout rate between layers
    :param num_layers: {int} Number of layers to build
    :param num_units: {int} Number of LSTM-Units to use in network
    :param use_gpu: {bool} Uses Tensorflow GPU support if True, otherwise trains on CPU
    :param num_params: {int} Number of control parameters
    :return: Keras model
    """

    # Load max 5000 entries from the dataset to build the Tokenizer / vocabulary
    loader = Loader(min(batch_size, 5000), 0)
    tokenizer = Tokenizer(filters='', split='°', lower=False)

    for dataframe in loader:

        chars = set()

        for name in dataframe['name']:
            chars.update(set(str(name)))

        tokenizer.fit_on_texts(list(chars))

    tokenizer.fit_on_texts(['pre', '<end>', 'pad'])

    # Build Keras Model
    model = Sequential()
    for r in range(0, max(num_layers - 1, 0)):
        model.add(layer=(CuDNNLSTM if use_gpu else LSTM
                         )(num_units,
                           input_shape=(window_size,
                                        len(tokenizer.index_word) + 1 +
                                        num_params),
                           return_sequences=True))
        model.add(Dropout(dropout_rate))

    model.add(
        layer=(CuDNNLSTM if use_gpu else LSTM)(num_units,
                                               input_shape=(
                                                   window_size,
                                                   len(tokenizer.index_word) +
                                                   1 + num_params)))
    model.add(Dense(len(tokenizer.index_word) + 1, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Show summary
    print(model.summary())

    return model, tokenizer