Exemple #1
0
def main():

    params1 = {'m' : 1000, 
              'n_a' : 32,
              'n_s' : 64,
              'Tx' : 30,   # Max length of input
              'Ty' : 10    # o/p length "YYYY-MM-DD"
              }    
    
    dataset, human_vocab, machine_vocab, inv_machine_vocab = nmt.load_dataset(params1['m'])
    X, Y, Xoh, Yoh = nmt.preprocess_data(dataset, human_vocab, machine_vocab, params1['Tx'], params1['Ty'])
    
    params2 = {'machine_vocab_size' : len(machine_vocab),
               'human_vocab_size' : len(human_vocab)}
    
    params = {**params1, **params2}
    
    hparams = {'lr' : 0.005, 
               'beta_1' : 0.9, 
               'beta_2' : 0.999, 
               'decay' : 0.01}
    
    vocab = {'human_vocab' : human_vocab,
             'machine_vocab' : machine_vocab,
             'inv_machine_vocab' : inv_machine_vocab}
    
    # Defined shared layers as global variables
    global repeator, concatenator, densor1, densor2, activator, dotor, encoder
    
    encoder = Bidirectional(LSTM(params['n_a'], return_sequences=True))
    
    repeator = RepeatVector(params['Tx'])
    concatenator = Concatenate(axis=-1)
    densor1 = Dense(10, activation = "tanh")
    densor2 = Dense(1, activation = "relu")
    activator = Activation(nmt.softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded from nmt_utils
    dotor = Dot(axes = 1)
    
    model, _ = myModel(Xoh, Yoh, **params, **hparams)
    model.load_weights('models/coursera_model.h5')
    
    return model, params, vocab
Exemple #2
0

parser = argparse.ArgumentParser(
    description='Either train or evaluate attn model for normalizing dates.')
parser.add_argument('-m',
                    '--mode',
                    type=str,
                    required=True,
                    choices=['train', 'eval'],
                    help="pick mode; either train or eval")
args = parser.parse_args()

# We'll train the model on a dataset of 10000 human readable dates
# and their equivalent, standardized, machine readable dates.
nb_samples = 10000
dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(
    nb_samples)
print('Human vocab', human_vocab)
print('Machine vocab', machine_vocab)
print('Inverse machine vocab', inv_machine_vocab)

X, Y = zip(*dataset)
X, Y, _, _ = preprocess_data(dataset, human_vocab, machine_vocab,
                             seq_len_human, seq_len_machine)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

encoder_rnn = EncoderRNN(EMBEDDING_DIM_PRE_ATTN, HIDDEN_DIM_PRE_ATTN_LSTM,
                         len(human_vocab)).to(device)
attn_decoder_rnn = AttnDecoderRNN(HIDDEN_DIM_POST_ATTN_LSTM,
                                  len(machine_vocab)).to(device)
Exemple #3
0
def main():
    m = 10000
    dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(m)

    Tx = 30
    Ty = 10
    X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx,
                                     Ty)

    if DO_TEST: test(human_vocab, X, Y, Xoh, Yoh, dataset)

    # Defined shared layers as global variables
    repeator = RepeatVector(Tx)
    concatenator = Concatenate(axis=-1)
    densor = Dense(1, activation="relu")
    activator = Activation(
        softmax, name='attention_weights'
    )  # We are using a custom softmax(axis = 1) loaded in this notebook
    dotor = Dot(axes=1)

    def my_one_step_attention(
        a, s_prev
    ):  # this way we fix layers without recreating them, and can pass the fct as arg
        return one_step_attention(a, s_prev, repeator, concatenator, densor,
                                  dotor, activator)

    n_a = 64
    n_s = 128
    post_activation_LSTM_cell = LSTM(n_s, return_state=True)
    output_layer = Dense(len(machine_vocab), activation=softmax)

    model = create_model(Tx, Ty, n_a, n_s, len(human_vocab),
                         my_one_step_attention, post_activation_LSTM_cell,
                         output_layer)

    model.summary()

    ### START CODE HERE ### (≈2 lines)
    opt = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, decay=0.01)
    model.compile(optimizer=opt,
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    ### END CODE HERE ###

    s0 = np.zeros((m, n_s))
    c0 = np.zeros((m, n_s))
    outputs = list(Yoh.swapaxes(0, 1))

    #model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100)
    model.fit([Xoh, s0, c0], outputs, epochs=25, batch_size=256)

    EXAMPLES = [
        '3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007',
        'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001',
        '1 March 2001', '5 March 09', '5 April 09', '05 April 2009'
    ]
    for example in EXAMPLES:
        # below: version of notebook, using Keras 2.0.7
        # source = string_to_int(example, Tx, human_vocab)
        # source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0, 1)

        # below: locl version, using Keras 2.1.3
        source = string_to_int(example, Tx, human_vocab)
        source = np.array(
            list(
                map(lambda x: to_categorical(x, num_classes=len(human_vocab)),
                    source)))
        # .swapaxes(0,1) remove the swap axes call
        # Add an additional dimension as axes 0
        source = np.expand_dims(source, axis=0)

        prediction = model.predict([source, s0, c0])
        prediction = np.argmax(prediction, axis=-1)
        output = [inv_machine_vocab[int(i)] for i in prediction]

        print("source:", example)
        print("output:", ''.join(output))
Exemple #4
0
def main(save=True, save_dir=os.getcwd(), load_model=None):
    """
    Arguments:
    save : save model after training to directory
    save_dir: save trained models to this directory if not provided will save in current working directory
    
    return: trained/ loaded model, parameters of models, vocbulary
    """
    global model, params, vocab
    params1 = {
        'm': 1000,
        'n_a': 32,
        'n_s': 64,
        'Tx': 30,  # Max length of input
        'Ty': 10  # o/p length "YYYY-MM-DD"
    }

    dataset, human_vocab, machine_vocab, inv_machine_vocab = nmt_utils.load_dataset(
        params1['m'])
    X, Y, Xoh, Yoh = nmt_utils.preprocess_data(dataset, human_vocab,
                                               machine_vocab, params1['Tx'],
                                               params1['Ty'])

    params2 = {
        'machine_vocab_size': len(machine_vocab),
        'human_vocab_size': len(human_vocab)
    }

    params = {**params1, **params2}

    hparams = {'lr': 0.005, 'beta_1': 0.9, 'beta_2': 0.999, 'decay': 0.01}

    vocab = {
        'human_vocab': human_vocab,
        'machine_vocab': machine_vocab,
        'inv_machine_vocab': inv_machine_vocab
    }

    # Defined shared layers as global variables
    global repeator, concatenator, densor1, densor2, activator, dotor

    repeator = RepeatVector(params['Tx'])
    concatenator = Concatenate(axis=-1)
    densor1 = Dense(10, activation="tanh")
    densor2 = Dense(1, activation="relu")
    activator = Activation(
        nmt_utils.softmax, name='attention_weights'
    )  # We are using a custom softmax(axis = 1) loaded from nmt_utils_utils
    dotor = Dot(axes=1)

    if save:
        model, _ = myModel(Xoh, Yoh, **params, **hparams)
        print("Saving weights...")
        model.save_weights(os.path.join(save_dir, 'date_model_epoch1.h5'))
        print("Weight Saved!")

    else:
        if load_model == None:
            raise FileNotFoundError(
                'Please provide valid model path along with model name!')
        else:
            model = create_model(**params)
            print("loading pretrained weights...")
            model.load_weights('models/date_model_epoch15.h5')
            print("date_model_epoch15.h5 weights loaded!!")

    return model, params, vocab
from faker import Faker
import random
from tqdm import tqdm
from babel.dates import format_date
from nmt_utils import load_dataset, preprocess_data, string_to_int, to_categorical
from pyt_model import Attn
import torch.optim as optim
import torch
import torch.nn as nn


# We'll train the model on a dataset of 10000 human readable dates
# and their equivalent, standardized, machine readable dates. 
m = 10000
dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(m)

# X: a processed version of the human readable dates in the training set,
# where each character is replaced by an index mapped to the character via human_vocab.
# Each date is further padded to $T_x$ values with a special character (< pad >). X.shape = (m, Tx)

# Y: a processed version of the machine readable dates in the training set,
# where each character is replaced by the index it is mapped to in machine_vocab.
# You should have Y.shape = (m, Ty).

# Xoh: one-hot version of X, the "1" entry's index is mapped to the character thanks
# to human_vocab. Xoh.shape = (m, Tx, len(human_vocab))

# Yoh: one-hot version of Y, the "1" entry's index is mapped to the character thanks to machine_vocab.
# Yoh.shape = (m, Tx, len(machine_vocab)).
# Here, len(machine_vocab) = 11 since there are 11 characters ('-' as well as 0-9).