def main(): # get and process data data = utils.DateData(2000) print("Chinese time order: yy/mm/dd ", data.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", data.date_en[:3]) print("vocabularies: ", data.vocab) print( "x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]), "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]), data.y[0])) model = Transformer(MODEL_DIM, MAX_LEN, N_LAYER, N_HEAD, data.num_word, DROP_RATE) # training t0 = time.time() for t in range(1000): bx, by, seq_len = data.sample(64) bx, by = utils.pad_zero(bx, max_len=MAX_LEN), utils.pad_zero( by, max_len=MAX_LEN + 1) loss = model.step(bx, by) if t % 50 == 0: logits = model(bx[:1], by[:1, :-1], False)[0].numpy() t1 = time.time() print( "step: ", t, "| time: %.2f" % (t1 - t0), "| loss: %.4f" % loss.numpy(), "| target: ", "".join([ data.i2v[i] for i in by[0, 1:] if i != data.v2i["<PAD>"] ]), "| inference: ", "".join([ data.i2v[i] for i in np.argmax(logits, axis=1) if i != data.v2i["<PAD>"] ]), ) t0 = t1 os.makedirs("./visual_helper/transformer", exist_ok=True) model.save_weights("./visual_helper/transformer/model.ckpt") with open("./visual_helper/transformer_v2i_i2v.pkl", "wb") as f: pickle.dump({"v2i": data.v2i, "i2v": data.i2v}, f) # prediction src_seq = "02-11-30" print("src: ", src_seq, "\nprediction: ", model.translate(src_seq, data.v2i, data.i2v)) # save attention matrix for visualization _ = model(bx[:1], by[:1, :-1], training=False) data = { "src": [data.i2v[i] for i in data.x[0]], "tgt": [data.i2v[i] for i in data.y[0]], "attentions": model.attentions } with open("./visual_helper/transformer_attention_matrix.pkl", "wb") as f: pickle.dump(data, f)
def train(): # get and process data data = utils.DateData(4000) print("Chinese time order: yy/mm/dd ", data.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", data.date_en[:3]) print("vocabularies: ", data.vocab) print("x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]), "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]), data.y[0])) model = Seq2Seq( data.num_word, data.num_word, emb_dim=16, units=32, max_pred_len=11, start_token=data.start_token, end_token=data.end_token) # training for t in range(1500): bx, by, decoder_len = data.sample(32) loss = model.step(bx, by, decoder_len) if t % 70 == 0: target = data.idx2str(by[0, 1:-1]) pred = model.inference(bx[0:1]) res = data.idx2str(pred[0]) src = data.idx2str(bx[0]) print( "t: ", t, "| loss: %.3f" % loss, "| input: ", src, "| target: ", target, "| inference: ", res, )
def train(): # get and process data data = utils.DateData(DataSize) train_x, train_y, train_l = data.sample(DataSize) print("Chinese time order: yy/mm/dd ", data.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", data.date_en[:3]) print("vocabularies: ", data.vocab) print( "x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]), "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]), data.y[0])) model = Seq2Seq(data.num_word, data.num_word, emb_dim=16, units=32, max_pred_len=11, start_token=data.start_token, end_token=data.end_token) model.compile(optimizer=keras.optimizers.Adam(Learn_rate), loss=keras.losses.SparseCategoricalCrossentropy(False), metrics=[keras.metrics.sparse_categorical_accuracy]) model.fit((train_x, train_y), train_y, callbacks=[myTensorboard(data)], batch_size=Batch_size, epochs=Epochs)
def train(): # get and process data data = utils.DateData(2000) print("Chinese time order: yy/mm/dd ", data.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", data.date_en[:3]) print("vocabularies: ", data.vocab) print("x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]), "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]), data.y[0])) model = Seq2Seq( data.num_word, data.num_word, emb_dim=12, units=14, attention_layer_size=16, max_pred_len=11, start_token=data.start_token, end_token=data.end_token) # training for t in range(1000): bx, by, decoder_len = data.sample(64) loss = model.step(bx, by, decoder_len) if t % 70 == 0: target = data.idx2str(by[0, 1:-1]) pred = model.inference(bx[0:1]) res = data.idx2str(pred[0]) src = data.idx2str(bx[0]) print( "t: ", t, "| loss: %.5f" % loss, "| input: ", src, "| target: ", target, "| inference: ", res, ) pkl_data = {"i2v": data.i2v, "x": data.x[:6], "y": data.y[:6], "align": model.inference(data.x[:6], return_align=True)} with open("./visual/tmp/attention_align.pkl", "wb") as f: pickle.dump(pkl_data, f)
def train(): dataset = utils.DateData(4000) print("Chinese time order: yy/mm/dd ",dataset.date_cn[:3],"\nEnglish time order: dd/M/yyyy", dataset.date_en[:3]) print("Vocabularies: ", dataset.vocab) print(f"x index sample: \n{dataset.idx2str(dataset.x[0])}\n{dataset.x[0]}", f"\ny index sample: \n{dataset.idx2str(dataset.y[0])}\n{dataset.y[0]}") loader = DataLoader(dataset,batch_size=32,shuffle=True) model = Seq2Seq(dataset.num_word,dataset.num_word,emb_dim=16,units=32,max_pred_len=11,start_token=dataset.start_token,end_token=dataset.end_token) for i in range(100): for batch_idx , batch in enumerate(loader): bx, by, decoder_len = batch loss = model.step(bx,by) if batch_idx % 70 == 0: target = dataset.idx2str(by[0, 1:-1].data.numpy()) pred = model.inference(bx[0:1]) res = dataset.idx2str(pred[0].data.numpy()) src = dataset.idx2str(bx[0].data.numpy()) print( "Epoch: ",i, "| t: ", batch_idx, "| loss: %.3f" % loss, "| input: ", src, "| target: ", target, "| inference: ", res, )
def train(emb_dim=32,n_layer=3,n_head=4): dataset = utils.DateData(4000) print("Chinese time order: yy/mm/dd ",dataset.date_cn[:3],"\nEnglish time order: dd/M/yyyy", dataset.date_en[:3]) print("Vocabularies: ", dataset.vocab) print(f"x index sample: \n{dataset.idx2str(dataset.x[0])}\n{dataset.x[0]}", f"\ny index sample: \n{dataset.idx2str(dataset.y[0])}\n{dataset.y[0]}") loader = DataLoader(dataset,batch_size=32,shuffle=True) model = Transformer(n_vocab=dataset.num_word, max_len=MAX_LEN, n_layer = n_layer, emb_dim=emb_dim, n_head = n_head, drop_rate=0.1, padding_idx=0) if torch.cuda.is_available(): print("GPU train avaliable") device =torch.device("cuda") model = model.cuda() else: device = torch.device("cpu") model = model.cpu() for i in range(100): for batch_idx , batch in enumerate(loader): bx, by, decoder_len = batch bx, by = torch.from_numpy(utils.pad_zero(bx,max_len = MAX_LEN)).type(torch.LongTensor).to(device), torch.from_numpy(utils.pad_zero(by,MAX_LEN+1)).type(torch.LongTensor).to(device) loss, logits = model.step(bx,by) if batch_idx%50 == 0: target = dataset.idx2str(by[0, 1:-1].cpu().data.numpy()) pred = model.translate(bx[0:1],dataset.v2i,dataset.i2v) res = dataset.idx2str(pred[0].cpu().data.numpy()) src = dataset.idx2str(bx[0].cpu().data.numpy()) print( "Epoch: ",i, "| t: ", batch_idx, "| loss: %.3f" % loss, "| input: ", src, "| target: ", target, "| inference: ", res, )
def train(): # get and process data data = utils.DateData(4000) # 由语料库、中英文日期 (字符串形式和索引形式)构成 print("Chinese time order: yy/mm/dd ", data.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", data.date_en[:3]) print("vocabularies: ", data.vocab) print( "x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]), "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]), data.y[0])) model = Seq2Seq(data.num_word, data.num_word, emb_dim=16, units=32, max_pred_len=11, start_token=data.start_token, end_token=data.end_token) # train for t in range(1500): bx, by, decoder_len = data.sample(32) loss = model.step(bx, by, decoder_len) if t % 70 == 0: target = data.idx2str(by[0, 1:-1]) # 每次只翻译一个序列, 因此也只是返回一个翻译后的目标序列, 由索引组成 pred = model.inference(bx[0:1]) # 将由索引组成的目标序列转换成字符串形式 res = data.idx2str(pred[0]) # 将由索引组成的源序列转换成字符串形式 src = data.idx2str(bx[0]) print( "step:", t, "| loss:", loss, "| input:", src, "| target:", target, "| inference:", res, )
model.save_weights("./visual/models/transformer/model.ckpt") with open("./visual/tmp/transformer_v2i_i2v.pkl", "wb") as f: pickle.dump({"v2i": data.v2i, "i2v": data.i2v}, f) def export_attention(model, data): with open("./visual/tmp/transformer_v2i_i2v.pkl", "rb") as f: dic = pickle.load(f) model.load_weights("./visual/models/transformer/model.ckpt") bx, by, seq_len = data.sample(32) model.translate(bx, dic["v2i"], dic["i2v"]) attn_data = { "src": [[data.i2v[i] for i in bx[j]] for j in range(len(bx))], "tgt": [[data.i2v[i] for i in by[j]] for j in range(len(by))], "attentions": model.attentions } with open("./visual/tmp/transformer_attention_matrix.pkl", "wb") as f: pickle.dump(attn_data, f) if __name__ == "__main__": d = utils.DateData(4000) print("Chinese time order: yy/mm/dd ", d.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", d.date_en[:3]) print("vocabularies: ", d.vocab) print("x index sample: \n{}\n{}".format(d.idx2str(d.x[0]), d.x[0]), "\ny index sample: \n{}\n{}".format(d.idx2str(d.y[0]), d.y[0])) m = Transformer(MODEL_DIM, MAX_LEN, N_LAYER, N_HEAD, d.num_word, DROP_RATE) train(m, d, step=600) export_attention(m, d)
*res, '\n', ) super(myTensorboard, self).on_epoch_end(epoch, logs) def load_data(data, size): x, y, seq_len = data.sample(size) x = utils.pad_zero(x, MAX_LEN) y = utils.pad_zero(y, MAX_LEN + 1) return (x, y[:, :-1]), y[:, 1:] def train(model: Transformer, data): x, y = load_data(data, DATA_SIZE) tb = myTensorboard(data) model.compile(keras.optimizers.Adam(LEARN_RATE), loss=Loss()) model.fit(x, y, batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=[tb]) if __name__ == "__main__": d = utils.DateData(DATA_SIZE) print("Chinese time order: yy/mm/dd ", d.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", d.date_en[:3]) print("vocabularies: ", d.vocab) print("x index sample: \n{}\n{}".format(d.idx2str(d.x[0]), d.x[0]), "\ny index sample: \n{}\n{}".format(d.idx2str(d.y[0]), d.y[0])) m = Transformer(MODEL_DIM, MAX_LEN, N_LAYER, N_LAYER, N_HEAD, d.num_word) m.build([[None, 12], [None, 12]]) train(m, d)
o, _, _ = self.decoder_train(dec_emb_in, s, sequence_length=seq_len) logits = o.rnn_output return logits def step(self, x, y, seq_len): with tf.GradientTape() as tape: logits = self.train_logits(x, y, seq_len) dec_out = y[:, 1:] # ignore <GO> _loss = self.cross_entropy(dec_out, logits) grads = tape.gradient(_loss, self.trainable_variables) self.opt.apply_gradients(zip(grads, self.trainable_variables)) return _loss.numpy() # get and process data data = utils.DateData(2000) print("Chinese time order: yy/mm/dd ", data.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", data.date_en[:3]) print("vocabularies: ", data.vocab) print("x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]), "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]), data.y[0])) model = Seq2Seq( data.num_word, data.num_word, emb_dim=16, units=32, max_pred_len=11, start_token=data.start_token, end_token=data.end_token) # training for t in range(1500): bx, by, decoder_len = data.sample(32) loss = model.step(bx, by, decoder_len) if t % 30 == 0: target = data.idx2str(by[0, 1:-1])
print( "t: ", t, "| loss: %.3f" % loss, "| input: ", src, "| target: ", target, "| inference: ", res, ) return LOSS if __name__ == '__main__': data = utils.DateData(4000) # 输入数据 m_time = {} epochs = 1001 import time start = time.time() # RNN: seq2seq model = Seq2Seq(data.num_word, data.num_word, emb_dim=16, units=32, max_pred_len=11, start_token=data.start_token, end_token=data.end_token) LOSS = train(data, model, epochs) end = time.time()