print('단어 카운트:', token.word_counts) print('문장 카운트:', token.document_count) print('각 단어가 몇개의 문장에 포함되어 있는가 :', token.word_docs) print('각 단어에 매겨진 인덱스 값 :', token.word_index) print() # 텍스트를 읽고 긍정 , 부정 분류 예측 docs = ['너무 재밌네요', '최고에요','참 잘만든 영화예요','추천하고 싶은 영화네요','한번 더 보고싶네요', '글쎄요','별로네요','생각보다 지루합니다','연기가 좋지않아요','재미없어요'] import numpy as np classes = np.array([1,1,1,1,1,0,0,0,0,0]) token = Tokenizer() token.fit_on_texts(docs) print(token.word_index) model = Sequential() model.add(Embedding(word_size,8,input_length=4)) #model.add(Flatten()) model.add(LSTM(32)) model.add(Dense(1,activation='sigmoid')) print(model.summary()) model.compile(optimizer='adam',loss='binary_crossentropy')
def build_model(use_gpu: bool = False, num_units: int = 64, num_layers: int = 1, dropout_rate: float = 0.0, batch_size: int = 1000, window_size: int = 10, num_params: int = 0): """ Builds the RNN-Model for character prediction. :param window_size: Sequence size :param batch_size: {int} Size of batch :param dropout_rate: {float} Regulating Dropout rate between layers :param num_layers: {int} Number of layers to build :param num_units: {int} Number of LSTM-Units to use in network :param use_gpu: {bool} Uses Tensorflow GPU support if True, otherwise trains on CPU :param num_params: {int} Number of control parameters :return: Keras model """ # Load max 5000 entries from the dataset to build the Tokenizer / vocabulary loader = Loader(min(batch_size, 5000), 0) tokenizer = Tokenizer(filters='', split='°', lower=False) for dataframe in loader: chars = set() for name in dataframe['name']: chars.update(set(str(name))) tokenizer.fit_on_texts(list(chars)) tokenizer.fit_on_texts(['pre', '<end>', 'pad']) # Build Keras Model model = Sequential() for r in range(0, max(num_layers - 1, 0)): model.add(layer=(CuDNNLSTM if use_gpu else LSTM )(num_units, input_shape=(window_size, len(tokenizer.index_word) + 1 + num_params), return_sequences=True)) model.add(Dropout(dropout_rate)) model.add( layer=(CuDNNLSTM if use_gpu else LSTM)(num_units, input_shape=( window_size, len(tokenizer.index_word) + 1 + num_params))) model.add(Dense(len(tokenizer.index_word) + 1, activation='softmax')) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Show summary print(model.summary()) return model, tokenizer