train = traindata test = testdata # split into input and outputs train_X, train_y = train[:, :-1], train[:, -1] test_X, test_y = test[:, :-1], test[:, -1] # reshape input to be 3D [samples, timesteps, features] train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1])) test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1])) print(train_X.shape, train_y.shape, test_X.shape, test_y.shape) # design network model = Sequential() model.add( LSTM(neuronNum[kk], activation='sigmoid', input_shape=(train_X.shape[1], train_X.shape[2]))) model.add(Dense(1)) model.compile(loss='mae', optimizer='adam') model.summary() # fit network history = model.fit(train_X, train_y, epochs=50, batch_size=72, validation_split=0.2, verbose=2, shuffle=False) # plot history pyplot.plot(history.history['loss'], label='train') pyplot.plot(history.history['val_loss'], label='validation')
# reshape input to be [samples, time steps, features] trainX = numpy.reshape(trainX, (trainX.shape[0], 5, trainX.shape[1])) testX = numpy.reshape(testX, (testX.shape[0], 5, testX.shape[1])) validX = numpy.reshape(validX, (validX.shape[0], 5, validX.shape[1])) ############################################# ## Define the model ############################################# # create and fit the LSTM network model = Sequential() # model.add(LSTM(4, input_shape=(1, look_back), return_sequences=True)) # model.add(LSTM(4, input_shape=(1, look_back), return_sequences=True)) # model.add(LSTM(16, input_shape=(1, look_back), return_sequences=True)) # model.add(LSTM(64, input_shape=(1, look_back), return_sequences=False)) model.add(LSTM(4, input_shape=(1, look_back), return_sequences=True)) model.add(LSTM(16, input_shape=(1, look_back), return_sequences=True)) model.add(LSTM(64, input_shape=(1, look_back), return_sequences=False)) model.add(Dropout(rate=0.3)) # model.add(Dense(3)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') # Early stopping callback PATIENCE = 40 early_stopping = EarlyStopping(monitor='loss', min_delta=0, patience=PATIENCE, verbose=0, mode='auto') callbacks = [early_stopping]
def stacked_bi_lstm(batch, epoch): program = os.path.basename(sys.argv[0]) logger = logging.getLogger(program) logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s') logging.root.setLevel(level=logging.INFO) logger.info(r"running %s" % ''.join(sys.argv)) logging.info('loading data...') # pickle_file = os.path.join('pickle', 'wassa_origin_tweet_glove.pickle3') pickle_file = os.path.join('pickle', 'parse_staford_no_letters_only_no_stopword_noreplace_v3.pickle') revs, W, word_idx_map, vocab, maxlen = pickle.load(open(pickle_file, 'rb')) logging.info('data loaded!') X_train, X_trial, y_train, y_trial, lex_train, lex_trial = make_idx_data(revs, word_idx_map, maxlen=maxlen) n_train_sample = X_train.shape[0] logging.info("n_train_sample [n_train_sample]: %d" % n_train_sample) n_trial_sample = X_trial.shape[0] logging.info("n_trial_sample [n_train_sample]: %d" % n_trial_sample) len_sentence = X_train.shape[1] # 200 logging.info("len_sentence [len_sentence]: %d" % len_sentence) max_features = W.shape[0] logging.info("num of word vector [max_features]: %d" % max_features) num_features = W.shape[1] # 400 logging.info("dimension of word vector [num_features]: %d" % num_features) sequence = Input(shape=(maxlen,), dtype='int32') lex_input = Input(shape=(43,), dtype='float32') embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False)(sequence) # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence) embedded = Dropout(0.25)(embedded) # stacked LSTM hidden = Bidirectional(LSTM(hidden_dim // 2, recurrent_dropout=0.25, return_sequences=True))(embedded) hidden = Bidirectional(LSTM(hidden_dim // 2, recurrent_dropout=0.25))(hidden) dense = Concatenate(axis=-1)([hidden, lex_input]) dense = Dense(256, activation='relu')(dense) # dropout = Dropout(0.25)(dense) # dense = Dense(128, activation='relu')(dropout) # dense = Dense(64, activation='relu')(dense) output = Dense(6, activation='softmax')(dense) model = Model(inputs=[sequence, lex_input], outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc', f1]) model.fit(x=[X_train, lex_train], y=y_train, batch_size=batch, epochs=epoch, validation_data=([X_trial, lex_trial], y_trial)) y_pred = model.predict([X_trial, lex_trial], batch_size=batch_size) return y_pred
print("------------------------------------") print("Using parameters...") print("Vocabulary size: ", vocabulary_size) print("Number of labels: ", num_labels) print("Embeddings dimension: ", embeddings_dimension) print("Batch size: ", batch_size) print("Hidden layer size: ", hidden_layer) print("learning rate: ", learning_rate) print("Epochs: ", num_epoch) # Build the model print("------------------------------------") print('Build model...') model = Sequential() model.add(Embedding(vocabulary_size, embeddings_dimension, input_length=max_utterance_len, weights=[embedding_matrix], mask_zero=False)) model.add(LSTM(hidden_layer, dropout=0.3, return_sequences=True, kernel_initializer='random_uniform', recurrent_initializer='glorot_uniform')) model.add(TimeDistributed(Dense(hidden_layer, input_shape=(max_utterance_len, hidden_layer)))) model.add(GlobalMaxPooling1D()) model.add(Dense(num_labels, activation='softmax')) optimizer = RMSprop(lr=learning_rate, decay=0.001) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) print(model.summary()) # Train the model print("------------------------------------") print("Training model...") start_time = time.time() print("Training started: " + datetime.datetime.now().strftime("%b %d %T") + " for", num_epoch, "epochs")
x_test = np.array(X_test) ytest = np.array(y_test) print(x_train.shape, ytrain.shape, x_test.shape, ytest.shape) #Transformation de la forme des données x_train = x_train.reshape((210, 70, 6)) ytrain = to_categorical(ytrain) x_test = x_test.reshape((70, 70, 6)) ytest = to_categorical(ytest) print(x_train.shape, ytrain.shape, x_test.shape, ytest.shape) #Définition du modèle model = Sequential() model.add( LSTM(units=70, return_sequences=True, input_shape=(x_train.shape[1], 6))) model.add(Dropout(0.596602)) model.add(LSTM(units=50)) model.add(Dropout(0.2854)) model.add(Dense(ytrain.shape[1], activation='softmax')) #Configuration du modèle pour l'entrainement model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) #Entraînement du réseau epochs = 5 batch_size = 7 model.fit(x_train, ytrain, epochs=epochs, batch_size=batch_size)
def bilstm(X_train, X_test, Y_train, Y_test, wordembeddings): np.random.seed(1234) tf.random.set_seed(1234) random.seed(1234) max_length_sentence = X_train.str.split().str.len().max() tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n\'', lower=True) tokenizer.fit_on_texts(X_train) word_index = tokenizer.word_index EMBEDDING_DIM = 300 vocabulary_size = len(word_index) + 1 print('Found %s unique tokens.' % len(word_index)) sequences_train = tokenizer.texts_to_sequences(X_train) sequences_valid = tokenizer.texts_to_sequences(X_test) X_train = pad_sequences(sequences_train, maxlen=max_length_sentence) X_val = pad_sequences(sequences_valid, maxlen=X_train.shape[1]) y_train = np.asarray(Y_train) y_val = np.asarray(Y_test) #print(word_index) ''' print('Shape of data tensor:', X_train.shape) print('Shape of data tensor:', X_val.shape) print('Shape of data tensor:', y_train.shape) print('Shape of data tensor:', y_val.shape) print(X_train) print("*"*100) print(X_val) print("*"*100) print(y_train) print("*"*100) print(y_val) ''' embedding_matrix = np.zeros((vocabulary_size, EMBEDDING_DIM)) for word, i in word_index.items(): if (word in wordembeddings.keys()): embedding_vector = wordembeddings[word] if len(embedding_vector) == 0: #if array is empty embedding_vector = wordembeddings[word.title()] if len(embedding_vector) == 0: embedding_vector = wordembeddings[word.upper()] if len(embedding_vector) == 0: embedding_vector = np.array([ round(np.random.rand(), 8) for i in range(0, 300) ]) else: #print("WORD NOT IN DICT",word) embedding_vector = np.array( [round(np.random.rand(), 8) for i in range(0, 300)]) if len(embedding_vector) != 0: embedding_matrix[i] = embedding_vector embedding_layer = Embedding(vocabulary_size, EMBEDDING_DIM, weights=[embedding_matrix], trainable=False) #Try with True inputs = Input(shape=(X_train.shape[1], )) model = (Embedding(vocabulary_size, EMBEDDING_DIM, input_length=max_length_sentence, weights=[embedding_matrix]))(inputs) model = (Bidirectional(LSTM(64)))(model) model = (Dense(900, activation='relu'))(model) model = (Dense(400, activation='relu'))(model) model = (Dense(250, activation='relu'))(model) model = (Dense(204, activation='softmax'))(model) model = Model(inputs=inputs, outputs=model) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() callbacks = [EarlyStopping(monitor='val_loss')] hist_adam = model.fit( X_train, y_train, batch_size=1000, epochs=200, verbose=1, validation_data=(X_val, y_val), callbacks=callbacks ) #!!!!!!!!!!!!!!!!!!!!!!!CHANGE BATCH SIZE TO 1000 #change epochs to 200 model.save(config.bilstm_prepocessed_dataset1_chai ) # !!!!!!!!!! CHANGE THIS FOR OTHER MODELS y_pred = model.predict(X_val) print(y_pred) y_val_class = pd.DataFrame(y_val).idxmax(axis=1) print(y_val_class) y_val_class_argmax = np.argmax(y_val, axis=1) y_pred_class_argmax = np.argmax(y_pred, axis=1) y_pred_class = pd.DataFrame(y_pred).idxmax(axis=1) print(y_pred_class) print(classification_report(y_val_class, y_pred_class)) plt.suptitle('Optimizer : Adam', fontsize=10) plt.ylabel('Loss', fontsize=16) plt.xlabel('Epoch', fontsize=14) plt.plot(hist_adam.history['loss'], color='b', label='Training Loss') plt.plot(hist_adam.history['val_loss'], color='r', label='Validation Loss') plt.legend(loc='upper right') plt.savefig( '/home/ubuntu/asset_classification/results/bilstm_model_dataset1_preprocessed_chai.png' ) # !!!!!!! CHANGE THIS FOR OTHER MODELS tf.keras.utils.plot_model( model, to_file=config.bilstm_architecture, show_shapes=True) # !!!!!!! CHANGE THIS FOR OTHER MODELS return (y_pred, y_val_class, y_pred_class, y_val_class_argmax, y_pred_class_argmax)
train_seqs.append(train_scaled_arr[startIdx:startIdx + 7]) train_seqs = np.stack(train_seqs) test_seqs = [] nSegments = test_arr.shape[ 0] // 12 # each segment holds 4hr data (12 datapoints, 20min each) for segment in range(nSegments): for t in range(6): startIdx = segment * 12 + t test_seqs.append(test_scaled_arr[startIdx:startIdx + 7]) test_seqs = np.stack(test_seqs) #keras #https://keras.io/getting-started/sequential-model-guide/#examples input_dim = len(useful_cols) output_dim = len(useful_cols) timesteps = 6 # use 6 timesteps to predict the 7th x_train, y_train = train_seqs[:, 0:-1], train_seqs[:, -1] x_test, y_test = test_seqs[:, 0:-1], test_seqs[:, -1] model = Sequential() model.add(LSTM(16, input_shape=(timesteps, input_dim))) model.add(Dense(output_dim)) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(x_train, y_train, epochs=500, batch_size=64, validation_data=(x_test, y_test))
print(type(x_train)) # x_train /= 255 x_test /= 255 #print(x_train[0]) print(y_train[0]) #把整形int标签转换成one-hot编码的数组标签,以方便计算loss y_train = keras.utils.to_categorical(y_train, n_classes) y_test = keras.utils.to_categorical(y_test, n_classes) print(y_train[0]) #搭建模型 model=Sequential() model.add(LSTM(n_hidden,batch_input_shape=(None,n_step,n_input),unroll=True)) model.add(Dense(n_classes))#这个参数应该与输出维度相同了 model.add(Activation('softmax')) adam=Adam(lr=learning_rate) model.summary() model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) model.fit(x_train,y_train,batch_size=batch_size,epochs=training_iters,verbose=1,validation_data=(x_test,y_test)) score=model.evaluate(x_test,y_test,verbose=0) print('LSTM test score:',score[0]) print('LSTM test accuracy:',score[1])
# # LSTM # In[23]: from keras.layers import LSTM # In[24]: model_lstm = Sequential() model_lstm.add(Embedding(max_features, 32)) model_lstm.add(LSTM(32)) model_lstm.add(Dense(1,activation='sigmoid')) # In[25]: model_lstm.summary() # In[26]: model_lstm.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['acc'])
seq_in = raw_text[i:i + seq_length] seq_out = raw_text[i + seq_length] dataX.append([char_to_int[char] for char in seq_in]) dataY.append(char_to_int[seq_out]) n_patterns = len(dataX) print("Total Patterns: ", n_patterns) # reshape X to be [samples, time steps, features] X = numpy.reshape(dataX, (n_patterns, seq_length, 1)) # normalize X = X / float(n_vocab) # one hot encode the output variable y = np_utils.to_categorical(dataY) # define the LSTM model model = Sequential() model.add( LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True)) model.add(Dropout(0.5)) model.add(LSTM(256)) model.add(Dropout(0.3)) model.add(Dense(y.shape[1], activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam') # define the checkpoint filepath = "weights-improvement-{epoch:02d}-{loss:.4f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] # fit the model # print("Epoch: %i" % i)
target = target # normalization scaler = MinMaxScaler(feature_range=(0, 1)) #data = scaler.fit_transform(data) #target = scaler.fit_transform(target) # data for training # data_train = data[1:100].reshape(99,1,2) # target_train = target.reshape(99,1,2) data_train = data[1:100].reshape(99, 1, 2) target_train = target #.reshape(1,99,2) model = Sequential() model.add(LSTM(4, input_shape=(1, 2), activation='tanh')) model.add(Dense(2)) # compile the model model.compile(loss='mse', optimizer='adam', metrics=['accuracy']) # run the model history = model.fit(data_train, target_train, nb_epoch=10000, batch_size=10, validation_split=0.2) # plotting results import matplotlib.pyplot as plt
x = array([[1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, 8], [7, 8, 9], [8, 9, 10], [9, 10, 11], [10, 11, 12], [20, 30, 40], [30, 40, 50], [40, 50, 60]]) y = array([4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 50, 60, 70]) print(x) print("x.shape :", x.shape) # (13, 3) print("y.shape :", y.shape) # (13, ) x = x.reshape((x.shape[0], x.shape[1], 1)) print(x) print("x.shape :", x.shape) # (13(행무시), 3(column), 1(1은 자르는 수)) #2. 모델 구성 model = Sequential() model.add(LSTM(50, activation='relu', input_shape=(3, 1))) # input_shape = (행(무시), 3(column), 1(자르는 수)) model.add(Dense(47)) model.add(Dense(44)) model.add(Dense(33)) model.add(Dense(30)) # model.add(Dense(25)) # model.add(Dense(20)) # model.add(Dense(17)) model.add(Dense(1)) # model.summary() #3. 실행 model.compile(optimizer='adam', loss=['mse']) model.fit(x, y, epochs=330, batch_size=1, verbose=0) # verbose=0하면 결과만 보여줌 / verbose=1 디폴트 / verbose=2 간략히나옴
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)): for t, char in enumerate(input_text): encoder_input_data[i, t, input_token_index[char]] = 1. encoder_input_data[i, t + 1:, input_token_index[' ']] = 1. for t, char in enumerate(target_text): # decoder_target_data is ahead of decoder_input_data by one timestep decoder_input_data[i, t, target_token_index[char]] = 1. if t > 0: # decoder_target_data will be ahead by one timestep # and will not include the start character. decoder_target_data[i, t - 1, target_token_index[char]] = 1. decoder_input_data[i, t + 1:, target_token_index[' ']] = 1. decoder_target_data[i, t:, target_token_index[' ']] = 1. # Define an input sequence and process it. encoder_inputs = Input(shape=(None, num_encoder_tokens)) encoder = LSTM(latent_dim, return_state=True) encoder_outputs, state_h, state_c = encoder(encoder_inputs) # We discard `encoder_outputs` and only keep the states. encoder_states = [state_h, state_c] # Set up the decoder, using `encoder_states` as initial state. decoder_inputs = Input(shape=(None, num_decoder_tokens)) # We set up our decoder to return full output sequences, # and to return internal states as well. We don't use the # return states in the training model, but we will use them in inference. decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) decoder_dense = Dense(num_decoder_tokens, activation='softmax') decoder_outputs = decoder_dense(decoder_outputs)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) # Part 2 - Building the RNN # Importing the Keras libraries and packages from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM from keras.layers import Dropout # Initialising the RNN regressor = Sequential() # Adding the first LSTM layer and some Dropout regularisation regressor.add( LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1))) regressor.add(Dropout(0.2)) # Adding a second LSTM layer and some Dropout regularisation regressor.add(LSTM(units=50, return_sequences=True)) regressor.add(Dropout(0.2)) # Adding a third LSTM layer and some Dropout regularisation regressor.add(LSTM(units=50, return_sequences=True)) regressor.add(Dropout(0.2)) # Adding a fourth LSTM layer and some Dropout regularisation regressor.add(LSTM(units=50)) regressor.add(Dropout(0.2)) # Adding the output layer
def pred_acc(y_true, y_pred): return K.mean(y_pred) # f = open("data/Cancer Claim Data - X causes cancer.csv", "r") # df = pd.read_csv(f) # f.close() # pu.db # Y = list(df("X")) input = Input(shape=(110, 100)) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(input) # variational biLSTM model = TimeDistributed(Dense(50, activation="relu"))( model) # a dense layer as suggested by neuralNer crf = CRF(1) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function) model.summary() # pu.db Y = keras.utils.to_categorical(Y, num_classes=110) Y = Y.reshape((Y.shape[0], Y.shape[1], 1)) all_train = all[:int(0.8 * all.shape[0]), ...]
embedding_vector = embeddings_index.get(word[0]) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector i += 1 # ******************************************************************* # Keras model of the chatbot: # ******************************************************************* ad = Adam(lr=0.00005) input_context = Input(shape=(maxlen_input,), dtype='int32')#, name='input_context') input_answer = Input(shape=(maxlen_input,), dtype='int32')#, name='input_answer') LSTM_encoder = LSTM(sentence_embedding_size, kernel_initializer= 'lecun_uniform') LSTM_decoder = LSTM(sentence_embedding_size, kernel_initializer= 'lecun_uniform') if os.path.isfile(weights_file): Shared_Embedding = Embedding(output_dim=word_embedding_size, input_dim=dictionary_size, input_length=maxlen_input) else: Shared_Embedding = Embedding(output_dim=word_embedding_size, input_dim=dictionary_size, weights=[embedding_matrix], input_length=maxlen_input) word_embedding_context = Shared_Embedding(input_context) context_embedding = LSTM_encoder(word_embedding_context) # LSTM_encoder_topic = LSTM(topic_embedding_size, kernel_initializer='lecun_uniform') LSTM_encoder_topic = Dense(topic_embedding_size, activation="relu") topic_embedding = LSTM_encoder_topic(context_embedding) word_embedding_answer = Shared_Embedding(input_answer) answer_embedding = LSTM_decoder(word_embedding_answer)
# test training split random.seed(3) rand_index = random.sample(range(len(Y)), len(Y)) X_shuffle = X[rand_index] Y_shuffle = Y[rand_index] split_index = int(len(Y) * 0.8) X_train = X_shuffle[:split_index, :] Y_train = Y_shuffle[:split_index, ] X_test = X_shuffle[split_index:, :] Y_test = Y_shuffle[split_index:, ] # nn model CNN and RNN, this part can be changed to test different model config model = Sequential() model.add(Embedding(2000, 64, input_length=30)) model.add(Conv1D(128, 3, activation='relu')) model.add(Bidirectional(LSTM(64))) model.add(Dense(256, activation='relu')) model.add(Dense(3, activation='sigmoid')) model.compile(optimizer=RMSprop(), loss='categorical_crossentropy', metrics=['accuracy']) train_history = model.fit( X_train, Y_train, batch_size=32, epochs=10, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)], validation_data=[X_test, Y_test]) # example with RNN only
# xt = np.transpose(x_test, axes=(0,2,1)) # x_train = np.concatenate([x, x_train], axis=1) # x_test = np.concatenate([xt, x_test], axis=1) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') print(x_train.shape[1:]) y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(TimeDistributed(Dropout(0), input_shape=input_shape)) model.add(LSTM(128)) model.add(Dense(64, activation='relu')) # model.add(Dropout(0.50)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
print('----- test sequences', len(x_test)) # 对每条词索引组成的数据进行长度对齐,去掉数据前面或后面多余的单词;长度不够插入0 print('========== 2.Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('----- x_train shape:', x_train.shape) print('----- x_test shape:', x_test.shape) # 搭建神经网络模型 print('========== 3.Build model...') model = Sequential() # input_dim=max_features单词表大小,output_dim=128为词向量维度 model.add(Embedding(max_features, 128)) # 将正整数下标转换为具有固定大小的向量,输出(*,*,128) # units=128代表通过LSTM,词向量维度转换为128 model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(1, activation='sigmoid')) # 神经网络编译/训练/测试集测试性能 model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() model.fit(x_train, y_train, batch_size=batch_size, epochs=5, validation_data=(x_test, y_test)) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) print('----- Test loss:', score)
def build_models(self, weights_path=None): # weights_path 如果给出weights_path的话,可以接着训练 # inp_mix_fea_shape (MaxLen(time), feature_dim) mix_fea_inp = Input(shape=(self.inp_fea_len, self.inp_fea_dim), name='input_mix_feature') # inp_mix_spec_shape (MaxLen(time), spectrum_dim),固定time_steps mix_spec_inp = Input(shape=(self.inp_fea_len, self.inp_spec_dim), name='input_mix_spectrum') # bg_mask_inp = Input(shape=(self.inp_fea_len, self.inp_spec_dim), name='input_bg_mask') # inp_target_spk_shape (1) target_spk_inp = Input(shape=(self.inp_spk_len, ), name='input_target_spk') #这个应该就是说话人的id的数字 # inp_clean_fea_shape (MaxLen(time), feature_dim),不固定time_steps # clean_fea_inp = Input(shape=(None, self.inp_fea_dim), name='input_clean_feature') #这个是目标说话人的原始语音,在evaluate的时候应该是不用的 clean_fea_inp = Input(shape=config.ImageSize, name='input_clean_feature') #输入的单个目标图片 mix_fea_layer = mix_fea_inp mix_spec_layer = mix_spec_inp target_spk_layer = target_spk_inp clean_fea_layer = clean_fea_inp '''由于图像不存在序列问题,不需要以下的一些mask''' # if config.IS_LOG_SPECTRAL: # clean_fea_layer = MaskingGt(mask_value=np.log(np.spacing(1) * 2))(clean_fea_inp) # else: # clean_fea_layer = Masking(mask_value=0.)(clean_fea_inp) '''混合语音抽取的部分保持不变''' # clean_fea_layer = clean_fea_inp # 设置了两层 双向 LSTM, 抽取混叠语音的特征 # (None(batch), MaxLen(time), feature_dim) -> (None(batch), None(time), hidden_dim) for _layer in range(config.NUM_LAYERS): # 开始累加 LSTM, SIZE_RLAYERS LSTM隐层和输出神经元大小, mix_fea_layer = \ Bidirectional(LSTM(config.HIDDEN_UNITS, return_sequences=True), merge_mode='concat')(mix_fea_layer) # 利用全连接层, 转换到语谱图(t,f)的嵌入维度 # (None(batch), MaxLen(time), hidden_dim) -> (None(batch), MaxLen(time), spec_dim * embed_dim) mix_embedding_layer = TimeDistributed( Dense(self.inp_spec_dim * config.EMBEDDING_SIZE, activation='tanh'))(mix_fea_layer) # (None(batch), MaxLen(time), spec_dim * embed_dim) -> (None(batch), MaxLen(time), spec_dim, embed_dim) mix_embedding_layer = Reshape( (self.inp_fea_len, self.inp_spec_dim, config.EMBEDDING_SIZE))(mix_embedding_layer) # 抽取目标说话人纯净语音的特征, 测试阶段为全0 # (Batch,imagesize[0],imagesize[1]) -> (Batch,imageEmbedding) # TODO: 这块用来设计抽取图像特征所采用的网络 # spk_vector_layer_forImage=image_net(clear_fea_layer) # spk_vector_layer1=Convolution2D(4, 5, 5, border_mode='valid', input_shape=(1,config.ImageSize[0],config.ImageSize[1]))(clean_fea_layer) clean_fea_layer = Reshape( (1, config.ImageSize[0], config.ImageSize[1]))(clean_fea_layer) spk_vector_layer1 = Convolution2D(4, 5, 5, border_mode='valid')(clean_fea_layer) spk_vector_layer1 = Activation('relu')(spk_vector_layer1) spk_vector_layer1 = MaxPooling2D(pool_size=(2, 2))(spk_vector_layer1) spk_vector_layer2 = Convolution2D( 8, 3, 3, border_mode='valid')(spk_vector_layer1) spk_vector_layer2 = Activation('relu')(spk_vector_layer2) spk_vector_layer2 = MaxPooling2D(pool_size=(2, 2))(spk_vector_layer2) spk_vector_layer3 = Convolution2D( 16, 3, 3, border_mode='valid')(spk_vector_layer2) spk_vector_layer3 = Activation('relu')(spk_vector_layer3) spk_vector_layer3 = MaxPooling2D(pool_size=(2, 2))(spk_vector_layer3) spk_vector_flatten = Flatten()(spk_vector_layer3) spk_vector_layer_image = Dense(config.EMBEDDING_SIZE, init='normal')(spk_vector_flatten) # 加载并更新到当前的 长时记忆单元中 # [((None(batch), 1), ((None(batch), embed_dim))] -> (None(batch), spk_size, embed_dim) spk_life_long_memory_layer = SpkLifeLongMemory( self.spk_size, config.EMBEDDING_SIZE, unk_spk=config.UNK_SPK, name='SpkLifeLongMemory')( [target_spk_layer, spk_vector_layer_image]) # 抽取当前Batch下的记忆单元 # (None(batch), embed_dim) spk_memory_layer = SelectSpkMemory(name='SelectSpkMemory')( [target_spk_layer, spk_life_long_memory_layer]) # 全连接层 # (None(batch), MaxLen(time), hidden_dim) -> (None(batch), MaxLen(time), spec_dim * embed_dim) # (None(batch), embed_dim) -> (None(batch), embed_dim) # memory_layer = Dense(config.EMBEDDING_SIZE, activation='tanh')(spk_memory_layer) # 进行Attention(Masking)计算 # (None(batch), MaxLen(time), spec_dim) output_mask_layer = Attention( self.inp_fea_len, self.inp_spec_dim, config.EMBEDDING_SIZE, mode='align', name='Attention')([ mix_embedding_layer # 这是那个三维的mix语音 , spk_memory_layer # 这个是memory里得到的目标说话人的声纹 # , memory_layer # , bg_mask_layer]) ]) # 进行masking # (None(batch), MaxLen(time), spec_dim) output_clean = merge([output_mask_layer, mix_spec_layer], mode='mul', name='target_clean_spectrum') # 注意,可以有多个输入,多个输出 auditory_model = Model( input=[mix_fea_inp, mix_spec_inp, target_spk_inp, clean_fea_inp], output=[output_clean], name='auditory_model') # 输出Memory的结果, 用于外部长时记忆单元的更新 spk_memory_model = Model( input=auditory_model.input, output=auditory_model.get_layer('SelectSpkMemory').output, name='spk_vec_model') # 如果保存过模型的话,可以加载之前的权重继续跑 if weights_path: print 'Load the trained weights of ', weights_path self.log_file.write('Load the trained weights of %s\n' % weights_path) auditory_model.load_weights(weights_path) print 'Compiling...' time_start = time.time() # 如果采用交叉熵(categorical_crossentropy)的话,输出一定要是0-1之间的概率,那么只能用logistic或softmax做输出 # 如非概率输出的话,可以考虑最小均方误差(mse)等loss, 后面改用概率输出的话,可换成交叉熵 auditory_model.compile(loss='mse', optimizer=self.optimizer) time_end = time.time() print 'Compiled, cost time: %f second' % (time_end - time_start) return auditory_model, spk_memory_model
def train_model(embedding, mode): num_lstm = np.random.randint(175, 275) num_dense = np.random.randint(150, 250) hidden_size = 150 rate_drop_lstm = 0.25 + np.random.rand() * 0.25 rate_drop_dense = 0.25 + np.random.rand() * 0.25 now = time.localtime() current_timestamp = "%04d-%02d-%02d_%02d:%02d:%02d" % ( now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec) STAMP = '{:s}_{:d}_{:d}_{:.2f}_{:.2f}'.format(embedding, num_lstm, num_dense, rate_drop_lstm, rate_drop_dense) ######################################## ## define the model structure ######################################## embedding_layer = Embedding(nb_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False) lstm_layer = LSTM(num_lstm, dropout=rate_drop_lstm, recurrent_dropout=rate_drop_lstm) #cnn window_size = [1, 2, 3, 4] num_filters = 20 conv_layers = [] pool_layers = [] for w in window_size: conv_layer = Conv2D(filters=num_filters, kernel_size=(w, EMBEDDING_DIM), strides=(1, 1), padding='valid', activation='relu') pool_layer = MaxPool2D(pool_size=(MAX_SEQUENCE_LENGTH - w + 1, 1), strides=(1, 1), padding='valid') conv_layers.append(conv_layer) pool_layers.append(pool_layer) sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_1 = embedding_layer(sequence_1_input) x1 = lstm_layer(embedded_sequences_1) #cnn embedded_sequences_1 = Reshape( (MAX_SEQUENCE_LENGTH, EMBEDDING_DIM, 1))(embedded_sequences_1) xs = [] for i in range(len(conv_layers)): x = conv_layers[i](embedded_sequences_1) x = pool_layers[i](x) x = Reshape((num_filters, ))(x) xs.append(x) sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_2 = embedding_layer(sequence_2_input) y1 = lstm_layer(embedded_sequences_2) #cnn embedded_sequences_2 = Reshape( (MAX_SEQUENCE_LENGTH, EMBEDDING_DIM, 1))(embedded_sequences_2) ys = [] for i in range(len(conv_layers)): y = conv_layers[i](embedded_sequences_2) y = pool_layers[i](y) y = Reshape((num_filters, ))(y) ys.append(y) # x2 = concatenate(xs) y2 = concatenate(ys) extra_features = Input(shape=(extra_feature_num, ), dtype='float32') # merged = concatenate([x1, y1]) add_distance1 = add([x1, y1]) mul_distance1 = multiply([x1, y1]) input0 = concatenate([add_distance1, mul_distance1]) input1 = Dropout(rate_drop_dense)(input0) input1 = BatchNormalization()(input1) output1 = Dense(hidden_size, activation='relu')(input1) add_distance2 = add([x2, y2]) mul_distance2 = multiply([x2, y2]) input2 = concatenate([add_distance2, mul_distance2]) input3 = Dropout(rate_drop_dense)(input2) input3 = BatchNormalization()(input3) output3 = Dense(hidden_size, activation='relu')(input3) # merged = concatenate([add_distance1, mul_distance1, add_distance2, mul_distance2]) merged = Dropout(rate_drop_dense)(input0) merged = BatchNormalization()(merged) merged = concatenate([merged, extra_features]) merged = Dense(num_dense, activation=act)(merged) merged = Dropout(rate_drop_dense)(merged) merged = BatchNormalization()(merged) merged1 = Dropout(rate_drop_dense)(input2) merged1 = BatchNormalization()(merged1) merged1 = concatenate([merged1, extra_features]) merged1 = Dense(num_dense, activation=act)(merged1) merged1 = Dropout(rate_drop_dense)(merged1) merged1 = BatchNormalization()(merged1) output2 = Dense(hidden_size, activation='relu')(merged) output4 = Dense(hidden_size, activation='relu')(merged1) merged = add([output1, output2]) merged = BatchNormalization()(merged) merged1 = add([output3, output4]) merged1 = BatchNormalization()(merged1) final_merged = concatenate([merged, merged1]) final_merged = Dropout(rate_drop_dense)(final_merged) preds = Dense(1, activation='sigmoid')(final_merged) ######################################## ## add class weight ######################################## if re_weight: class_weight = {0: class0_weight, 1: class1_weight} else: class_weight = None ######################################## ## train the model ######################################## model = Model(inputs=[sequence_1_input, sequence_2_input, extra_features], outputs=preds) model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['acc']) # model.summary() print(STAMP) if (mode == 'train'): # # TRAIN SCRIPT # early_stopping = EarlyStopping(monitor='val_loss', patience=10) bst_model_path = H5_PATH + STAMP + '.h5' model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True) hist = model.fit( [data_1_train, data_2_train, train_features], labels_train, validation_data=([data_1_val, data_2_val, val_features], labels_val, weight_val), epochs=10, batch_size=2048, shuffle=True, class_weight=class_weight, callbacks=[early_stopping, model_checkpoint]) model.load_weights(bst_model_path) bst_val_score = min(hist.history['val_loss']) print('best val score: {}'.format(bst_val_score)) ######################################## ## make the submission ######################################## print('Start making the submission before fine-tuning') preds = model.predict([test_data_1, test_data_2, test_features], batch_size=1024, verbose=1) preds += model.predict([test_data_2, test_data_1, test_features], batch_size=1024, verbose=1) preds /= 2 submission = pd.DataFrame({ 'test_id': test_ids, 'is_duplicate': preds.ravel() }) file_name = '%.4f_' % bst_val_score + STAMP + '_' + current_timestamp + '.csv' submission.to_csv(RESULT_PATH + file_name, index=False) elif (mode == 'test'): model.load_weights(H5_PATH + 'glove_191_207_0.48_0.27.h5') # # TEST SCRIPT # while (1): print('') print('##') print('## TEST START') print('##') tsentence1 = input("Enter Sentence 1: ") tsentence2 = input("Enter Sentence 2: ") tlist1 = [] tlist2 = [] tlist1.append(text_to_word_list(tsentence1)) tlist2.append(text_to_word_list(tsentence2)) tsequences_1 = tokenizer.texts_to_sequences(tlist1) tsequences_2 = tokenizer.texts_to_sequences(tlist2) tdata_1 = pad_sequences(tsequences_1, maxlen=MAX_SEQUENCE_LENGTH) tdata_2 = pad_sequences(tsequences_2, maxlen=MAX_SEQUENCE_LENGTH) tfeatures = get_extra_features(tdata_1.tolist(), tdata_2.tolist(), idf_dict, embedding_matrix, question_freq, inter_dict) print('Calculating...') preds = model.predict([tdata_1, tdata_2, tfeatures], batch_size=1, verbose=1) preds += model.predict([tdata_2, tdata_1, tfeatures], batch_size=1, verbose=1) preds /= 2 result = pd.DataFrame({'similarity': preds.ravel()}) print(result) print('')
embedded = Lambda(binarize, output_shape=binarize_outshape)(in_sentence) block2 = char_block(embedded, (128, 256), filter_length=(5, 5), subsample=(1, 1), pool_length=(2, 2)) block3 = char_block(embedded, (192, 320), filter_length=(7, 5), subsample=(1, 1), pool_length=(2, 2)) sent_encode = concatenate([block2, block3], axis=-1) # sent_encode = Dropout(0.2)(sent_encode) encoder = Model(inputs=in_sentence, outputs=sent_encode) encoder.summary() encoded = TimeDistributed(encoder)(document) lstm_h = 92 lstm_layer = LSTM(lstm_h, return_sequences=True, dropout=0.1, recurrent_dropout=0.1, implementation=0)(encoded) lstm_layer2 = LSTM(lstm_h, return_sequences=False, dropout=0.1, recurrent_dropout=0.1, implementation=0)(lstm_layer) # output = Dropout(0.2)(bi_lstm) output = Dense(1, activation='sigmoid')(lstm_layer2) model = Model(outputs=output, inputs=document) model.summary() if checkpoint: model.load_weights(checkpoint) file_name = os.path.basename(sys.argv[0]).split('.')[0] check_cb = keras.callbacks.ModelCheckpoint('checkpoints/' + file_name + '.{epoch:02d}-{val_loss:.2f}.hdf5',
print(X_train_t) print(y_train.shape) print(y_train) ## LSTM Model import keras from keras.models import Sequential from keras.layers import Dense, LSTM, Dropout import keras.backend as K from keras.callbacks import EarlyStopping # 모델 구성하기 K.clear_session() model = Sequential() # Sequeatial Model model.add(LSTM(20, input_shape=(12, 1))) # (timestep, feature model.add(Dense(1)) # output = 1 model.compile(loss='mean_squared_error', optimizer='adam') model.summary() early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1) # model fitting for i in range(100): model.fit(X_train_t, y_train, epochs=100, batch_size=30, verbose=1, callbacks=[early_stop]) model.reset_states() print(X_test_t) y_pred = model.predict(X_test_t) print(y_pred)
def build_part1_RNN(window_size): model = Sequential() model.add(LSTM(5, input_shape=(window_size, 1))) model.add(Dense(1)) return model
#Check The Embedding Dimensions print("EMBEDING DIM: ", embedDim) print("\n") #Create The Model, A Single Embedding Layer, 1/2 LSTM Layers, A Flatten Layer (For Multiple Classification) #And A Dense Layer With 1,2, or 5 Nodes Depending On Number Of Classification Categories print("Creating The Model...\n") model = Sequential() model.add( Embedding(len(wordIndex) + 1, embedDim, input_length=75, weights=[embedMatrix], trainable=True)) #model.add(LSTM(75, dropout=0.2, recurrent_dropout=0.3, return_sequences=True)) model.add(LSTM(100, dropout=0.3, recurrent_dropout=0.2, return_sequences=True)) #5-Sentiment model.add(Flatten()) model.add(Dense(5, activation='softmax')) #model.add(Dense(2, activation='softmax')) #model.add(Dense(1, activation='sigmoid')) #Print Model Summary model.summary() #Compiling The Model #Binary-Sentiment #model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) #5-Sentiment
def build_part2_RNN(window_size, num_chars): model = Sequential() model.add(LSTM(200, input_shape=(window_size, num_chars))) model.add(Dense(num_chars)) model.add(Activation('softmax')) return model
def stacked_bi_lstm_context(): left_pickle_file = os.path.join('pickle', 'context_left.pickle') left_revs, left_W, left_word_idx_map, left_vocab, left_maxlen = pickle.load(open(left_pickle_file, 'rb')) left_X_train, left_X_test, left_X_dev, left_y_train, left_y_dev = make_idx_data(left_revs, left_word_idx_map, maxlen=left_maxlen) left_n_train_sample = left_X_train.shape[0] left_n_test_sample = left_X_test.shape[0] left_len_sentence = left_X_train.shape[1] # 200 left_max_features = left_W.shape[0] left_num_features = left_W.shape[1] # 400 # Keras Model # this is the placeholder tensor for the input sequence left_sequence = Input(shape=(left_maxlen,), dtype='int32') left_embedded = Embedding(input_dim=left_max_features, output_dim=left_num_features, input_length=left_maxlen, mask_zero=True, weights=[left_W], trainable=False)(left_sequence) left_embedded = Dropout(0.25)(left_embedded) left_hidden = Bidirectional(LSTM(hidden_dim, recurrent_dropout=0.25, return_sequences=True))(left_embedded) left_hidden = Bidirectional(LSTM(hidden_dim, recurrent_dropout=0.25))(left_hidden) right_pickle_file = os.path.join('pickle', 'context_right.pickle') right_revs, right_W, right_word_idx_map, right_vocab, right_maxlen = pickle.load(open(right_pickle_file, 'rb')) right_X_train, right_X_test, right_X_dev, right_y_train, right_y_dev = make_idx_data(right_revs, right_word_idx_map, maxlen=right_maxlen) right_n_train_sample = right_X_train.shape[0] right_n_test_sample = right_X_test.shape[0] right_len_sentence = right_X_train.shape[1] # 200 right_max_features = right_W.shape[0] right_num_features = right_W.shape[1] # 400 # Keras Model # this is the placeholder tensor for the input sequence right_sequence = Input(shape=(right_maxlen,), dtype='int32') right_embedded = Embedding(input_dim=right_max_features, output_dim=right_num_features, input_length=right_maxlen, mask_zero=True, weights=[right_W], trainable=False)(right_sequence) # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence) right_embedded = Dropout(0.25)(right_embedded) right_hidden = Bidirectional(LSTM(hidden_dim, recurrent_dropout=0.25, return_sequences=True))(right_embedded) right_hidden = Bidirectional(LSTM(hidden_dim, recurrent_dropout=0.25))(right_hidden) x = Concatenate(axis=-1)([left_hidden, right_hidden]) # x =Concatenate([left_flatten, right_flatten]) dense = Dense(256, activation='relu')(x) dropout = Dropout(0.25)(dense) dense = Dense(256, activation='relu')(dropout) dense = Dense(256, activation='relu')(dense) output = Dense(6, activation='softmax')(dense) model = Model(inputs=[left_sequence, right_sequence], outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) early_stopping = EarlyStopping(monitor='val_acc', patience=3, verbose=2) model.fit([left_X_train, right_X_train], left_y_train, validation_split=0.1, batch_size=batch_size, epochs=nb_epoch, verbose=1, callbacks=[early_stopping]) test = np.hstack((left_X_test, right_X_test)) y_pred = model.predict(test, batch_size=batch_size) return y_pred
# split into train and test sets train_size = idtrain_end #int(len(dataset) * 0.09) test_size = len(dataset) - train_size train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :] # reshape into X=t and Y=t+1 trainX, trainY = create_dataset(train, look_back) testX, testY = create_dataset(test, look_back) # reshape input to be [samples, time steps, features] trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1])) # create and fit the LSTM network model = Sequential() model.add(LSTM(4, input_shape=(1, look_back))) model.add(Dense(1)) model.compile(loss='logcosh', optimizer='Adagrad') #logcosh Adagrad,logcosh rmsprop model.fit(trainX, trainY, epochs=niterations, batch_size=1, verbose=2) # make predictions trainPredict = model.predict(trainX) testPredict = model.predict(testX) # invert predictions trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) testPredict = scaler.inverse_transform(testPredict) testY = scaler.inverse_transform([testY])
# multiclass # y_train=np_utils.to_categorical(y_train) # y_test=np_utils.to_categorical(y_test) # y_test_21=np_utils.to_categorical(y_test_21) model = Sequential() model.add( GRU(80, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True)) model.add(Dropout(0.1)) model.add(Dense(80, activation='relu')) model.add(Dropout(0.1)) model.add(LSTM(80, return_sequences=True)) model.add(Dropout(0.1)) model.add(Dense(80, activation='relu')) model.add(Dropout(0.1)) model.add(GRU(80, return_sequences=False)) model.add(Dropout(0.1)) # binary model.add(Dense(1)) model.add(Activation('hard_sigmoid')) # multiclass # model.add(Dense(5)) # model.add(Activation('softmax'))
from keras.models import Sequential from keras.layers import Dense, LSTM, Activation lr = 0.0001 batch_size = 32 input_shape = (,) ROOT = 'data/' model = Sequential() model.add(LSTM(128, dropout=0.8, input_shape=input_shape, batch_size=batch_size)) # kick, snare, closed, open, clap model.add(Dense(5, activation='softmax')) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])