def test_AttentionSeq2Seq(): x = np.random.random((samples, input_length, input_dim)) y = np.random.random((samples, output_length, output_dim)) models = [] models += [ AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim)) ] models += [ AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=2) ] models += [ AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=3) ] # att = AttentionSeq2Seq(output_dim=tsp_output_dim, hidden_dim=tsp_hidden_dim, output_length=tsp_output_length, input_shape=(tsp_input_length, tsp_input_dim)) for model in models: model.compile(loss='mse', optimizer='sgd') print model.summary() model.fit(x, y, epochs=1)
def test_AttentionSeq2Seq(): print("test seq2seq-attention") x = np.random.random((batch, max_encoder_length, input_dim)) y = np.random.random((batch, max_decoder_length, output_dim)) models = [] models += [ AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=max_decoder_length, input_shape=(max_encoder_length, input_dim)) ] models += [ AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=max_decoder_length, input_shape=(max_encoder_length, input_dim), depth=2) ] models += [ AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=max_decoder_length, input_shape=(max_encoder_length, input_dim), depth=3) ] for model in models: model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1)
def test_attention_seq2seq(): x = np.random.random((samples, input_length, input_dim)) y = np.random.random((samples, output_length, output_dim)) models = [] models += [ AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim)) ] models += [ AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=2) ] models += [ AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=3) ] for model in models: model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=epoch_num)
def run(): data_process = DataProcess(use_word2cut=False) input_length = data_process.enc_input_length output_length = data_process.dec_output_length enc_embedding_length = data_process.enc_embedding_length dec_embedding_length = data_process.dec_embedding_length model = AttentionSeq2Seq(output_dim=dec_embedding_length, hidden_dim=data_process.hidden_dim, output_length=output_length, \ input_shape=(input_length, enc_embedding_length), \ batch_size=1, \ depth=data_process.layer_shape) model.compile(loss='mse', optimizer='rmsprop') model.load_weights("model/seq2seq_model_weights.h5") plot_model(model, to_file='model/seq2seq_model_structure.png', show_shapes=True, show_layer_names=True) text = u"碧水照嫩柳,桃花映春色" #u"这围巾要火!"#u"你愿意嫁给我吗?" enc_padding_ids = data_to_padding_ids(text) enc_embedding = data_to_embedding(enc_padding_ids) prediction_words = predict_one_text(model, enc_embedding) print(prediction_words) print_score(model, enc_embedding)
def test_AttentionSeq2Seq(): x = np.random.random((samples, input_length, input_dim)) y = np.random.random((samples, output_length, output_dim)) models = [] models += [AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim))] #models += [AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=2)] #models += [AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=3)] for model in models: model.compile(loss='mse', optimizer='sgd') plot_model(model, to_file='model1.png', show_shapes=True)
def train(x_train, y_train, epoch_num, hidden_dim, depth_num): input_length = x_train.shape[1] output_length = y_train.shape[1] input_dim = x_train.shape[2] output_dim = y_train.shape[2] model = AttentionSeq2Seq( input_shape=(input_length, input_dim), depth=depth_num, output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length) model.compile(loss='mse', optimizer="rmsprop") model.summary() model.fit(x_train, y_train, epochs=epoch_num) return model
def get_basic_model(): RNN = recurrent.LSTM model = AttentionSeq2Seq(input_dim=in_vocab_size, input_length=in_seq_length, hidden_dim=10, output_length=out_seq_length, output_dim=out_vocab_size, depth=2, bidirectional=False) model.add(RNN(hidden_size, input_shape=(in_seq_length, in_vocab_size))) model.add(RepeatVector(out_seq_length)) for _ in range(num_layers): model.add(RNN(hidden_size, return_sequences=True)) model.add(TimeDistributed(Dense(out_vocab_size))) #model.add(Attention(recurrent.LSTM(out_vocab_size, input_dim=in_vocab_size, return_sequences=False, consume_less='mem'))) model.add(Activation('softmax')) return model
def create_nn_model(self): self.create_embedding() #input_seq = Input(shape=(MAX_SEQ_LEN,)) # This converts the positive indices(integers) into a dense multi-dim representation. model = Sequential() model.add( Embedding(self.data_vocab_size + 1, TOKEN_REPRESENTATION_SIZE, weights=[self.embedding_matrix], trainable=False, input_shape=(MAX_SEQ_LEN, ))) model.add( AttentionSeq2Seq( batch_input_shape=(None, MAX_SEQ_LEN, TOKEN_REPRESENTATION_SIZE), #hidden_dim=200, output_length=MAX_SEQ_LEN, output_dim=TOKEN_REPRESENTATION_SIZE, #depth=MAX_SEQ_LEN, peek=True))#, return_sequences=True)) hidden_dim=100, output_length=MAX_SEQ_LEN, output_dim=TOKEN_REPRESENTATION_SIZE, depth=1)) model.add( TimeDistributed( Dense(self.data_vocab_size + 1, activation="softmax"))) model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) self.autoencoder = model #embedding = Embedding(self.data_vocab_size+1, TOKEN_REPRESENTATION_SIZE, # weights=[self.embedding_matrix], trainable=False)(input_seq) #encoder = LSTM(200)(embedding) #repeated = RepeatVector(MAX_SEQ_LEN)(encoder) #decoder = LSTM(200, return_sequences=True)(repeated) #time_dist = TimeDistributed(Dense(self.data_vocab_size+1, activation="softmax"))(decoder) #seq_autoencoder = Model(input_seq, time_dist) #seq_autoencoder.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) #seq_autoencoder.summary() #self.autoencoder = seq_autoencoder return self.autoencoder
def s2s_model(self): """Train s2s model here. Returns: A s2s model that can be used to predict result. Raises: IOError: An error occurred training xgb model. """ model = AttentionSeq2Seq(input_dim=self.input_dim, input_length=self.input_len, hidden_dim=16, output_length=self.output_len, output_dim=self.output_dim, depth=(1,1), stateful=False, dropout=0.5) model.compile(loss='mape', optimizer='adam', metrics=['mse']) model.fit(self.train_X, self.train_Y, epochs=75, verbose=2, shuffle=True) return model
def create_critic_model(self): model = Sequential() model.add( Embedding(self.data_vocab_size + 1, TOKEN_REPRESENTATION_SIZE, weights=[self.get_embedding_matrix()], trainable=False, input_shape=(MAX_SEQ_LEN, ))) model.add( AttentionSeq2Seq(batch_input_shape=(None, MAX_SEQ_LEN, TOKEN_REPRESENTATION_SIZE), hidden_dim=100, output_length=MAX_SEQ_LEN, output_dim=TOKEN_REPRESENTATION_SIZE, depth=1)) # The output is expected to be one scalar approximating the value of a state. model.add(TimeDistributed(Dense(1, activation="linear"))) print("Critic model created") model.summary() model.compile(optimizer="rmsprop", loss="mse", metrics=["accuracy"]) self.critic = model
def run(): enc_vec_model = gensim.models.Word2Vec.load(r'model/encoder_vector.m') dec_vec_model = gensim.models.Word2Vec.load(r'model/decoder_vector.m') batch_size = 9 epochs = 30 data_process = DataProcess(use_word2cut=False) documents_length = data_process.get_documents_size( data_process.enc_ids_file, data_process.dec_ids_file) input_length = data_process.enc_input_length output_length = data_process.dec_output_length enc_embedding_length = data_process.enc_embedding_length dec_embedding_length = data_process.dec_embedding_length if batch_size > documents_length: print("ERROR--->" + u"语料数据量过少,请再添加一些") return None if (data_process.hidden_dim < data_process.enc_input_length): print("ERROR--->" + u"隐层神经元数目过少,请再添加一些") return None model = AttentionSeq2Seq(output_dim=dec_embedding_length, hidden_dim=data_process.hidden_dim, output_length=output_length, \ input_shape=(input_length, enc_embedding_length), batch_size=batch_size, depth=data_process.layer_shape) # keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) model.compile(loss='mse', optimizer='rmsprop') model.fit_generator(generator=generate_batch(batch_size=batch_size, \ encoder_word2vec_model=enc_vec_model, \ decoder_word2vec_model=dec_vec_model, \ encoder_file_path=data_process.enc_ids_padding_file, \ decoder_file_path=data_process.dec_ids_padding_file, \ embedding_shape = (enc_embedding_length, dec_embedding_length)), steps_per_epoch=int(documents_length / batch_size), \ epochs=epochs, verbose=1, workers=1) model.save_weights("model/seq2seq_model_weights.h5", overwrite=True)
def build_model(training=True): data_process = DataProcess(use_word2cut=False) embedding_matrix = get_encoder_embedding() vocab_size, embedding_size = embedding_matrix.shape embedding_layer = Embedding( vocab_size, embedding_size, weights=[embedding_matrix], input_length=data_process.enc_input_length, trainable=training, name='encoder_embedding') enc_normalization = BatchNormalization(epsilon=data_process.epsilon) seq2seq = AttentionSeq2Seq( bidirectional=False, output_dim=data_process.dec_embedding_length, hidden_dim=data_process.hidden_dim, output_length=data_process.dec_output_length, input_shape=(data_process.enc_input_length, data_process.enc_embedding_length), depth=data_process.layer_shape) model = Sequential() model.add(embedding_layer) model.add(enc_normalization) model.add(seq2seq) # from keras.optimizers import SGD # sgd = SGD(lr=0.001, decay=0, clipvalue=0.0) # from keras.optimizers import Adam # adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-04) # adam rmsprop sgd model.compile(loss='mse', optimizer='adam', metrics=['accuracy']) return model
def __init__(self, eval_corpus, eval_corpus_vectors_path, result, model_path, model_config): super().__init__() self.eval_corpus = eval_corpus self.eval_corpus_vectors_path = eval_corpus_vectors_path self.result = result self.model_path = model_path self.model_config = model_config self.model = Sequential() self.model.add( AttentionSeq2Seq( output_dim=model_config.get("output_dim"), hidden_dim=model_config.get("hidden_dim"), output_length=model_config.get("output_length"), input_shape=( model_config.get("input_length"), model_config.get("input_dim"), ), )) self.model.add(TimeDistributed(Dense(model_config.get("output_dim")))) self.model.add(Activation("softmax")) self.model.load_weights(model_path)
if not os.path.exists(directory): os.makedirs(directory) checkpoint = ModelCheckpoint(model_weights_path, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True, mode='max') callbacks_list = [checkpoint] model = Sequential() model.add( AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(X_train[0].shape[0], X_train[0].shape[1]), dropout=drop_out_ratio, depth=depth)) model.add(TimeDistributed(Dense(output_dim))) model.add(Activation('softmax')) model.summary() optimizer = Adam() model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) hist = model.fit(X_train, y_train_flat, epochs=epoch_num, batch_size=batch_size, validation_data=(X_val, y_val_flat),
AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=2) ] models += [ AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=3) ] for model in models: model.compile(loss='mse', optimizer='sgd') model.fit(x, y, epochs=1) # test_SimpleSeq2Seq() # test_Seq2Seq() # test_AttentionSeq2Seq() from seq2seq.models import AttentionSeq2Seq model = AttentionSeq2Seq(input_dim=5, input_length=7, hidden_dim=10, output_length=8, output_dim=20, depth=4) model.compile(loss='mse', optimizer='rmsprop')
def create_actor_model(self): # This converts the positive indices(integers) into a dense multi-dim # representation. def evaluation_output_shape(input_shapes): input_shape = input_shapes[0] return (input_shape[0], input_shape[1]) self.create_critic_model() #####Sequential model with Attention####### model = Sequential() model.add( Embedding(self.data_vocab_size + 1, TOKEN_REPRESENTATION_SIZE, weights=[self.get_embedding_matrix()], trainable=False, input_shape=(MAX_SEQ_LEN, ))) model.add( AttentionSeq2Seq(batch_input_shape=(None, MAX_SEQ_LEN, TOKEN_REPRESENTATION_SIZE), hidden_dim=100, output_length=MAX_SEQ_LEN, output_dim=TOKEN_REPRESENTATION_SIZE, depth=1)) model.add( TimeDistributed( Dense(self.data_vocab_size + 1, activation="softmax"))) model.summary() self.actor = model #####Functional model without Attention ####### #input = Input(shape=(MAX_SEQ_LEN,)) #embedding = Embedding(self.data_vocab_size+1, # TOKEN_REPRESENTATION_SIZE, # weights=[self.get_embedding_matrix()], trainable=False)(input) #encoder = LSTM(100)(embedding) #repeat = RepeatVector(MAX_SEQ_LEN)(encoder) #decoder = LSTM(TOKEN_REPRESENTATION_SIZE, return_sequences=True)(repeat) #prediction = TimeDistributed(Dense(self.data_vocab_size+1, # activation="softmax"))(decoder) #self.actor = Model(input=input, output=prediction) optimizer = RMSprop() P = self.actor.output Q_pi = K.placeholder(shape=(MAX_SEQ_LEN, )) # loss = - evaluation # evaluation = Sum( prob * Q_pi ) loss = -K.sum(K.dot(K.max(P), Q_pi)) updates = optimizer.get_updates(self.actor.trainable_weights, self.actor.constraints, loss) self.evaluation_fn = K.function([self.actor.input, Q_pi], [loss], updates=updates) print("Actor model created")
# hard-code output dim and length output_dim = 10 output_length = 9 # zero padding inputs max_length = max_fea_seq_length(X_train) print('max_length of x_train is {}'.format(max_length)) X_test = pad_sequences(X_test, maxlen=max_length, dtype=np.float64) # load model model = Sequential() if 'att' in net_name: model.add( AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(X_test[0].shape[0], X_test[0].shape[1]), dropout=0, depth=depth)) elif 'seq' in net_name: model.add( Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(X_test[0].shape[0], X_test[0].shape[1]), depth=depth, peek=True)) model.add(TimeDistributed(Dense(10))) model.add(Activation('softmax')) model.load_weights(model_path) optimizer = Adam()
else: from torch import FloatTensor training_task = SHAPESModuloTask() param_list = [] for mod in training_task.module_dict.values(): param_list.extend(list(mod.parameters())) if state_dict['GPU_SUPPORT']: mod.cuda() attn_seq2seq = AttentionSeq2Seq(vocab_size_1=len(state_dict['VOCAB']), vocab_size_2=len(state_dict['TOKENS']), word_dim=args.word_dim, hidden_dim=args.hidden_dim, batch_size=state_dict['BATCH_SIZE'], num_layers=args.num_layers, use_dropout=args.use_dropout, dropout=args.dropout, use_cuda=state_dict['GPU_SUPPORT']) param_list.extend(list(attn_seq2seq.parameters())) print('Number of trainable paramters: {}'.format( sum(param.numel() for param in param_list if param.requires_grad))) if state_dict['GPU_SUPPORT']: attn_seq2seq.cuda() loss = BCEWithLogitsLoss()
plot_model(model, to_file='model2.png',show_shapes=True) print() ''' if __name__ == '__main__': datset = joblib.load('../data/data_seq.pkl') word_dict = joblib.load('../data/query_dict_seq.pkl') encoder_input_data = datset[0] decoder_input_data = datset[1] decoder_target_data = datset[2] input_token_index = word_dict[0] target_token_index = word_dict[1] model = AttentionSeq2Seq(output_dim=latent_dim, hidden_dim=latent_dim, output_length=max_decoder_seq_length, input_shape=(max_encoder_seq_length, num_encoder_tokens)) model.compile(optimizer='rmsprop', loss='categorical_crossentropy') #plot_model(model, to_file='model4.png', show_shapes=True) model.fit(x=encoder_input_data, y=decoder_target_data, batch_size=batch_size, epochs=epochs) model.save('./model/seq2seq_attention_v2.h5')
args = parser.parse_args() if not os.path.exists("tmp.pkl"): distributes = KeyedVectors.load_word2vec_format(args.dist_path) with open("tmp.pkl", "wb") as f: pickle.dump(distributes, f) else: with open("tmp.pkl", "rb") as f: distributes = pickle.load(f) middle_dim, depth_num, _, input_length, output_length, _ = [ int(i) for i in args.input_model.split(".")[:-1] ] dist_vec = get_distribute_vector(distributes, args.input_text)[:input_length, :] input_dim = 100 output_dim = 100 model = AttentionSeq2Seq(input_shape=(input_length, input_dim), depth=depth_num, output_dim=output_dim, hidden_dim=middle_dim, output_length=output_length) model.compile(loss='mse', optimizer="rmsprop") model.load_weights(args.input_model) y_pred = numpy.array([dist_vec]) for i in range(args.iter): y_pred = model.predict(y_pred[:, -input_length:, :]) for j in y_pred[0]: print(distributes.similar_by_vector(j, topn=1)[0][0], end=" ")
def train_keras(sentence_array, operator_chain_array): from seq2seq import SimpleSeq2Seq, Seq2Seq, AttentionSeq2Seq sentence_shape = sentence_array.shape length = sentence_shape[0] in_seq_length = sentence_shape[1] in_vocab_size = sentence_shape[2] op_shape = operator_chain_array.shape out_seq_length = op_shape[1] out_vocab_size = op_shape[2] print sentence_shape, op_shape batch_size = 64 hidden_size, embedding_dim = 10, 10 memory_dim = 200 num_layers = 2 # model = SimpleSeq2Seq(input_dim=in_vocab_size, # hidden_dim=embedding_dim, # output_length=out_seq_length, # output_dim=out_vocab_size, # depth=3) # model = Seq2Seq(batch_input_shape=(batch_size, in_seq_length, in_vocab_size), # hidden_dim=embedding_dim, # output_length=out_seq_length, # output_dim=out_vocab_size, # depth=num_layers) # model.compile(loss='mse', optimizer='rmsprop') # model = Sequential() # model.add(LSTM(10, return_sequences=True), batch_input_shape=(4, 5, 10)) # model.add(TFAttentionRNNWrapper(LSTM(10, return_sequences=True, consume_less='gpu'))) # model.add(Dense(5)) # model.add(Activation('softmax')) # model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics = ['accuracy']) # model = Sequential() # model.add(InputLayer(batch_input_shape=(nb_samples, timesteps, embedding_dim))) # model.add(wrappers.Bidirectional(recurrent.LSTM(embedding_dim, input_dim=embedding_dim, return_sequences=True))) # model.add(Attention(recurrent.LSTM(output_dim, input_dim=embedding_dim, return_sequences=True, consume_less='mem'))) # model.add(core.Activation('relu')) # model.compile(optimizer='rmsprop', loss='mse') # model.fit(x,y, nb_epoch=1, batch_size=nb_samples) def get_basic_model(): RNN = recurrent.LSTM model = AttentionSeq2Seq(input_dim=in_vocab_size, input_length=in_seq_length, hidden_dim=10, output_length=out_seq_length, output_dim=out_vocab_size, depth=2, bidirectional=False) model.add(RNN(hidden_size, input_shape=(in_seq_length, in_vocab_size))) model.add(RepeatVector(out_seq_length)) for _ in range(num_layers): model.add(RNN(hidden_size, return_sequences=True)) model.add(TimeDistributed(Dense(out_vocab_size))) #model.add(Attention(recurrent.LSTM(out_vocab_size, input_dim=in_vocab_size, return_sequences=False, consume_less='mem'))) model.add(Activation('softmax')) return model # model = get_basic_model() model = AttentionSeq2Seq(input_dim=in_vocab_size, input_length=in_seq_length, hidden_dim=2, output_length=out_seq_length, output_dim=out_vocab_size, depth=2, bidirectional=False) model.compile(loss='mse', optimizer='rmsprop', metrics=['accuracy']) num_train = int(0.9 * length) X_train = sentence_array[:num_train] X_val = sentence_array[num_train:] # human_sent, human_op = read_data_file("humantests.txt") # human_sentence_array = preprocess_english(human_sent) # human_operator_chain_array = preprocess_operators(human_op) #X_val = human_sentence_array y_train = operator_chain_array[:num_train] y_val = operator_chain_array[num_train:] #y_val = human_operator_chain_array # model.fit(sentence_array, # operator_chain_array, # batch_size=batch_size, # nb_epoch=1, # validation_data=(sentence_array, operator_chain_array)) for iteration in range(1, 200): print() print('-' * 50) print('Iteration', iteration) a = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1, validation_data=(X_val, y_val)) ### # Select 10 samples from the validation set at random so we can visualize errors for i in range(10): ind = np.random.randint(0, len(X_val)) rowX, rowy = X_val[np.array([ind])], y_val[np.array([ind])] model.predict(rowX, verbose=0) # q = ctable.decode(rowX[0]) # correct = ctable.decode(rowy[0]) # guess = ctable.decode(preds[0], calc_argmax=False) # print('Q', q[::-1] if INVERT else q) # print('T', correct) # print('---') return model