from common.optimizer import RMSprop from common.trainer import Trainer from common.util import eval_transformer from preprocess import preprocessing from model.transformer import Transformer # time_size 설정 time_size = 35 if os.path.isfile("../pkl/myTransformer_preprocess.pkl"): x_train, t_train = preprocessing.load_preprocess( '../pkl/myTransformer_preprocess.pkl') else: x_train, t_train = preprocessing.load_data( file_name="../dataset/ChatbotData.csv", need_soseos=True, save_file_name='../pkl/myTransformer_preprocess.pkl', time_size=time_size, padding_num=-1) # 파라미터 설정 vocab_size = len(preprocessing.id_to_word) wordvec_size = 300 head_size = 8 batch_size = 128 max_epoch = 20 max_grad = 5.0 x_test, x_train = preprocessing.divide_test_train(x_train, test_rate=0.1) t_test, t_train = preprocessing.divide_test_train(t_train, test_rate=0.1) model = Transformer(vocab_size,
import sys sys.path.append('..') from common.np import * from model.rnnlmgen import BetterRnnlmGen from preprocess import preprocessing preprocessing.load_data('../dataset/ChatbotData.csv', scaled_size=False) vocab_size = len(preprocessing.word_to_id) corpus_size = len(preprocessing.corpus) wordvec_size = 300 hidden_size = 300 model = BetterRnnlmGen(vocab_size=vocab_size, wordvec_size=wordvec_size, hidden_size=hidden_size) model.load_params('./pkl/myLstm.pkl') start_words = '오늘 너무 힘들어' start_ids = [preprocessing.word_to_id[w] for w in start_words.split(' ')] for x in start_ids[:-1]: x = np.array(x).reshape(1, 1) model.predict(x) word_ids = model.generate(start_ids[-1], sample_size=7) # word_ids = start_ids[:-1] + word_ids txt = ' '.join([preprocessing.id_to_word[i] for i in word_ids]) print(txt)
from preprocess import preprocessing from preprocess import integration from common.optimizer import Adam from common.trainer import Trainer from common.util import eval_seq2seq from model.attention_seq2seq import AttentionSeq2seq from model.seq2seq import Seq2seq from model.peeky_seq2seq import PeekySeq2seq from common import config config.GPU = True # 데이터 읽기 x_train, t_train = preprocessing.load_data( file_name='./dataset/ChatbotData.csv', need_soseos=True, save_file_name='./pkl/qadf2.pkl') # 시간절약 # x_train, t_train = preprocessing.load_preprocess('../pkl/qadf.pkl') # x_train, t_train, model = integration.sum_att_models('./pkl/myAttentionSeq2seq.pkl', './pkl/qadf.pkl', './pkl/myAttentionSeq2seq2.pkl', './pkl/qadf2.pkl') #test 나누기 x_test, x_train = preprocessing.divide_test_train(x_train, test_rate=0.1) t_test, t_train = preprocessing.divide_test_train(t_train, test_rate=0.1) # 하이퍼파라미터 설정 # default wordvec_size = 300 # default hidden_size = 300 # default batch_size = 300 vocab_size = len(preprocessing.id_to_word)
def learn(self, hidden_size=300, embedding_dim=300, epoch=10, batch_size=128, use_loaded=False, log=None): if log is not None: log.addItem('configuring file...') # load data if os.path.isfile("./qadf_tf.pkl") and use_loaded: self.x_train, self.t_train = preprocessing.load_preprocess( './qadf_tf.pkl') else: self.x_train, self.t_train = preprocessing.load_data( file_name='../dataset/ChatbotData.csv', need_soseos=True, padding_num=0, save_file_name='qadf_tf.pkl') # divide data self.x_test, self.x_train = preprocessing.divide_test_train( self.x_train, test_rate=0.1) self.t_test, self.t_train = preprocessing.divide_test_train( self.t_train, test_rate=0.1) self.max_len = self.x_train.shape[1] # train model model = None if os.path.isdir('./my_model') and use_loaded: model = load_model('./my_model', compile=False) if log is not None: log.addItem('model loaded...') else: print('model loaded...') else: if log is not None: log.addItem('creating model...') else: print('creating model...') # encoder embedding_dim = embedding_dim hidden_size = hidden_size encoder_inputs = Input(shape=(self.x_train.shape[1], )) # encoder embedding encoder_embed = Embedding(len(preprocessing.word_to_id), embedding_dim)(encoder_inputs) # encoder LSTM encoder_lstm1 = LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.4) encoder_output1, state_h1, state_c1 = encoder_lstm1(encoder_embed) encoder_lstm2 = LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.4) encoder_output2, state_h2, state_c2 = encoder_lstm2( encoder_output1) encoder_lstm3 = LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.4) encoder_outputs, state_h, state_c = encoder_lstm2(encoder_output2) # decoder decoder_inputs = Input(shape=(None, )) # decoder embedding decoder_embed_layer = Embedding(len(preprocessing.word_to_id), embedding_dim) decoder_embed = decoder_embed_layer(decoder_inputs) # decoder LSTM decoder_lstm = LSTM(hidden_size, return_sequences=True, return_state=True, dropout=0.4) decoder_outputs, _, _ = decoder_lstm( decoder_embed, initial_state=[state_h, state_c]) # softmax # decoder_softmax = Dense(len(preprocessing.word_to_id), activation='softmax') # decoder_softmax_outputs = decoder_softmax(decoder_outputs) # add attention attention_layer = AttentionLayer(name='attention_layer') attention_outputs, attention_states = attention_layer( [encoder_outputs, decoder_outputs]) # connect attention results and hidden states of decoder decoder_concat_inputs = Concatenate(axis=-1, name='concat_layer')( [decoder_outputs, attention_outputs]) #softmax decoder_softmax = Dense(len(preprocessing.word_to_id), activation='softmax') decoder_softmax_outputs = decoder_softmax(decoder_concat_inputs) # define model model = Model([encoder_inputs, decoder_inputs], decoder_softmax_outputs) save_model(model, './my_model') model.summary() if os.path.isfile('mycheckpoint.index') and use_loaded: model.load_weights('mycheckpoint') if log is not None: log.addItem("checkpoint loaded...") else: print("checkpoint loaded...") model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy') # fit if epoch > 0: # set condition for early stopping es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=0.1) history = model.fit( [self.x_train, self.t_train[:, :-1]], self.t_train.reshape(self.t_train.shape[0], self.t_train.shape[1], 1)[:, 1:], epochs=epoch, callbacks=[], batch_size=128, validation_data=([self.x_test, self.t_test[:, :-1]], self.t_test.reshape(self.t_test.shape[0], self.t_test.shape[1], 1)[:, 1:])) model.save_weights('mycheckpoint') # if you need visualization # plt.plot(history.history['loss'], label='train_loss') # plt.plot(history.history['val_loss'], label='val_loss') # plt.legend() # plt.show() # test model if os.path.isdir('./test_encoder') and os.path.isdir( './test_decoder') and use_loaded: self.encoder_model = load_model('./test_encoder', compile=True) self.decoder_model = load_model('./test_decoder', compile=True) else: # make test model self.encoder_model = Model( inputs=encoder_inputs, outputs=[encoder_outputs, state_h, state_c]) decoder_state_input_h = Input(shape=(hidden_size, )) decoder_state_input_c = Input(shape=(hidden_size, )) decoder_embed2 = decoder_embed_layer(decoder_inputs) decoder_outputs2, state_h2, state_c2 = decoder_lstm( decoder_embed2, initial_state=[decoder_state_input_h, decoder_state_input_c]) # attention function decoder_hidden_state_input = Input(shape=((self.x_train.shape[1], hidden_size))) attention_out_inf, attention_states_inf = attention_layer( [decoder_hidden_state_input, decoder_outputs2]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_outputs2, attention_out_inf]) # decoder output layer decoder_outputs2 = decoder_softmax(decoder_inf_concat) # decoder model self.decoder_model = Model([decoder_inputs] + [ decoder_hidden_state_input, decoder_state_input_h, decoder_state_input_c ], [decoder_outputs2] + [state_h2, state_c2]) self.encoder_model.save('test_encoder') print('test_encoder saved') self.decoder_model.save('test_decoder') print('test_decoder saved') if log is not None: log.addItem("finished...")