def transformer(use_feed): assert not use_feed, "transfomer doesn't support feed yet" return transformer_model.transformer( ModelHyperParams.src_vocab_size + 1, ModelHyperParams.trg_vocab_size + 1, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, ModelHyperParams.n_head, ModelHyperParams.d_key, ModelHyperParams.d_value, ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, ModelHyperParams.dropout, ModelHyperParams.src_pad_idx, ModelHyperParams.trg_pad_idx, ModelHyperParams.pos_pad_idx)
def transformer(use_feed): assert not use_feed, "transfomer doesn't support feed yet" return transformer_model.transformer( ModelHyperParams.src_vocab_size + 1, ModelHyperParams.trg_vocab_size + 1, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, ModelHyperParams.n_head, ModelHyperParams.d_key, ModelHyperParams.d_value, ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, ModelHyperParams.dropout, ModelHyperParams.src_pad_idx, ModelHyperParams.trg_pad_idx, ModelHyperParams.pos_pad_idx)
def load_model(hparams, ckpt_f): model = transformer(hparams) latest_ckpt = tf.train.latest_checkpoint(os.path.dirname(ckpt_f)) if latest_ckpt: start_epoch = int(latest_ckpt.split('-')[1].split('.')[0]) model.load_weights(latest_ckpt) print("model resumed from: {}, start at epoch: {}".format(latest_ckpt, start_epoch)) else: print("passing resume since weights not there. training from scrach") return model
def main(hparams): dataset, tokenizer = get_dataset(hparams) for data in dataset.take(2): print(data) model = transformer(hparams) latest_ckpt = tf.train.latest_checkpoint(os.path.dirname(ckpt_path)) start_epoch = 0 if latest_ckpt and hparams.train_from_last: start_epoch = int(latest_ckpt.split('-')[1].split('.')[0]) model.load_weights(latest_ckpt) print("Model resumed from: {}, start at epoch: {}".format( latest_ckpt, start_epoch)) else: print("Passing resume since weights not there, training from scratch") optimizer = tf.keras.optimizers.Adam(CustomSchedule(hparams), beta_1=0.9, beta_2=0.98, epsilon=1e-9) def loss_function(y_true, y_pred): y_true = tf.reshape(y_true, shape=(-1, hparams.max_length - 1)) loss = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True, reduction='none')(y_true, y_pred) mask = tf.cast(tf.not_equal(y_true, 0), tf.float32) loss = tf.multiply(loss, mask) return tf.reduce_mean(loss) def accuracy(y_true, y_pred): y_true = tf.reshape(y_true, shape=(-1, hparams.max_length - 1)) return tf.metrics.SparseCategoricalAccuracy()(y_true, y_pred) callbacks = [ tf.keras.callbacks.ModelCheckpoint(ckpt_path, save_weights_only=True, verbose=1, period=10) ] model.compile(optimizer, loss=loss_function, metrics=[accuracy]) try: history = model.fit(dataset, initial_epoch=start_epoch, epochs=hparams.epochs, callbacks=callbacks) evaluate(hparams, model, tokenizer) except KeyboardInterrupt: model.save_weights(ckpt_path.format(epoch=0)) print("Keras model saved.") model.save_weights(ckpt_path.format(epoch=0))
[i for i in prediction if i < tokenizer.vocab_size]) return predicted_sentence.lstrip() print("Importing trained model...") learning_rate = CustomSchedule(D_MODEL) optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) model = transformer(vocab_size=VOCAB_SIZE, num_layers=NUM_LAYERS, units=UNITS, d_model=D_MODEL, num_heads=NUM_HEADS, dropout=DROPOUT) model.compile(optimizer=optimizer, loss=loss_function, metrics=[accuracy]) model.load_weights('weights_CD.h5') dataSet = pd.read_csv('AppleData.csv') questions = list() answers = list() generatedAnswers = list() print("Generating candidate responses...") for i in range(100): questions.append(dataSet.values[i][3]) answers.append(dataSet.values[i][5]) cleared_sentence = preprocess_sentence(dataSet.values[i][3])
from transformer_model import transformer from preprocessing import data import torch X_train, y_train, label_number= data() model = transformer(label_number) model.load_state_dict(torch.load('saved_model')) print(model.predict(X_train[:3]))
from transformer_model import transformer from preprocessing import data, data_dev import torch X_train, y_train, label_number, label_dic = data() X_dev, y_dev = data_dev() # Train on a small subset of the data to see what happens model = transformer(label_number + 1) #0부터 시작하므로 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) last_loss = model.train_with_batch(X_train[:5], y_train[:5], X_dev[:8], y_dev[:8], nepoch=300) torch.save(model.state_dict(), 'saved_model') print('model saved!!!')