def transformer(use_feed):
    assert not use_feed, "transfomer doesn't support feed yet"
    return transformer_model.transformer(
        ModelHyperParams.src_vocab_size + 1,
        ModelHyperParams.trg_vocab_size + 1, ModelHyperParams.max_length + 1,
        ModelHyperParams.n_layer, ModelHyperParams.n_head,
        ModelHyperParams.d_key, ModelHyperParams.d_value,
        ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
        ModelHyperParams.dropout, ModelHyperParams.src_pad_idx,
        ModelHyperParams.trg_pad_idx, ModelHyperParams.pos_pad_idx)
def transformer(use_feed):
    assert not use_feed, "transfomer doesn't support feed yet"
    return transformer_model.transformer(
        ModelHyperParams.src_vocab_size + 1,
        ModelHyperParams.trg_vocab_size + 1, ModelHyperParams.max_length + 1,
        ModelHyperParams.n_layer, ModelHyperParams.n_head,
        ModelHyperParams.d_key, ModelHyperParams.d_value,
        ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
        ModelHyperParams.dropout, ModelHyperParams.src_pad_idx,
        ModelHyperParams.trg_pad_idx, ModelHyperParams.pos_pad_idx)
Esempio n. 3
0
def load_model(hparams, ckpt_f):
    model = transformer(hparams)
    latest_ckpt = tf.train.latest_checkpoint(os.path.dirname(ckpt_f))
    if latest_ckpt:
        start_epoch = int(latest_ckpt.split('-')[1].split('.')[0])
        model.load_weights(latest_ckpt)
        print("model resumed from: {}, start at epoch: {}".format(latest_ckpt, start_epoch))
    else:
        print("passing resume since weights not there. training from scrach")
    return model
Esempio n. 4
0
def main(hparams):
    dataset, tokenizer = get_dataset(hparams)
    for data in dataset.take(2):
        print(data)

    model = transformer(hparams)
    latest_ckpt = tf.train.latest_checkpoint(os.path.dirname(ckpt_path))
    start_epoch = 0
    if latest_ckpt and hparams.train_from_last:
        start_epoch = int(latest_ckpt.split('-')[1].split('.')[0])
        model.load_weights(latest_ckpt)
        print("Model resumed from: {}, start at epoch: {}".format(
            latest_ckpt, start_epoch))
    else:
        print("Passing resume since weights not there, training from scratch")

    optimizer = tf.keras.optimizers.Adam(CustomSchedule(hparams),
                                         beta_1=0.9,
                                         beta_2=0.98,
                                         epsilon=1e-9)

    def loss_function(y_true, y_pred):
        y_true = tf.reshape(y_true, shape=(-1, hparams.max_length - 1))
        loss = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction='none')(y_true, y_pred)

        mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)
        loss = tf.multiply(loss, mask)
        return tf.reduce_mean(loss)

    def accuracy(y_true, y_pred):
        y_true = tf.reshape(y_true, shape=(-1, hparams.max_length - 1))
        return tf.metrics.SparseCategoricalAccuracy()(y_true, y_pred)

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(ckpt_path,
                                           save_weights_only=True,
                                           verbose=1,
                                           period=10)
    ]
    model.compile(optimizer, loss=loss_function, metrics=[accuracy])
    try:
        history = model.fit(dataset,
                            initial_epoch=start_epoch,
                            epochs=hparams.epochs,
                            callbacks=callbacks)
        evaluate(hparams, model, tokenizer)
    except KeyboardInterrupt:
        model.save_weights(ckpt_path.format(epoch=0))
        print("Keras model saved.")
    model.save_weights(ckpt_path.format(epoch=0))
Esempio n. 5
0
        [i for i in prediction if i < tokenizer.vocab_size])
    return predicted_sentence.lstrip()


print("Importing trained model...")

learning_rate = CustomSchedule(D_MODEL)

optimizer = tf.keras.optimizers.Adam(learning_rate,
                                     beta_1=0.9,
                                     beta_2=0.98,
                                     epsilon=1e-9)

model = transformer(vocab_size=VOCAB_SIZE,
                    num_layers=NUM_LAYERS,
                    units=UNITS,
                    d_model=D_MODEL,
                    num_heads=NUM_HEADS,
                    dropout=DROPOUT)

model.compile(optimizer=optimizer, loss=loss_function, metrics=[accuracy])
model.load_weights('weights_CD.h5')

dataSet = pd.read_csv('AppleData.csv')
questions = list()
answers = list()
generatedAnswers = list()
print("Generating candidate responses...")
for i in range(100):
    questions.append(dataSet.values[i][3])
    answers.append(dataSet.values[i][5])
    cleared_sentence = preprocess_sentence(dataSet.values[i][3])
Esempio n. 6
0
from transformer_model import transformer
from preprocessing import data
import torch

X_train, y_train, label_number= data()

model = transformer(label_number)
model.load_state_dict(torch.load('saved_model'))

print(model.predict(X_train[:3]))
Esempio n. 7
0
from transformer_model import transformer
from preprocessing import data, data_dev
import torch

X_train, y_train, label_number, label_dic = data()
X_dev, y_dev = data_dev()

# Train on a small subset of the data to see what happens
model = transformer(label_number + 1)  #0부터 시작하므로
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

last_loss = model.train_with_batch(X_train[:5],
                                   y_train[:5],
                                   X_dev[:8],
                                   y_dev[:8],
                                   nepoch=300)

torch.save(model.state_dict(), 'saved_model')
print('model saved!!!')