Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--input",
                        default=None,
                        type=str,
                        help="run demo chatbot")

    args = parser.parse_args()

    input_sentence = args.input

    tokenizer = tfds.features.text.SubwordTextEncoder.load_from_file(
        vocab_filename)
    # Vocabulary size plus start and end token
    VOCAB_SIZE = tokenizer.vocab_size + 2

    model = Transformer(num_layers=NUM_LAYERS,
                        units=UNITS,
                        d_model=D_MODEL,
                        num_heads=NUM_HEADS,
                        vocab_size=VOCAB_SIZE,
                        dropout=DROPOUT,
                        name='transformer')
    demo_sentense = 'How are you'
    predict(demo_sentense, tokenizer, model, True)

    model.load_weights(save_weight_path)

    model.summary()
    tf.keras.utils.plot_model(model,
                              to_file='transformer.png',
                              show_shapes=True)

    predict(input_sentence, tokenizer, model)
Esempio n. 2
0
def main():
    Place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(Place):
        model = Transformer(image_size=512,
                            num_classes=15,
                            hidden_unit_num=1024,
                            layer_num=2,
                            head_num=16,
                            dropout=0.8,
                            decoder_name='PUP',
                            hyber=True,
                            visualable=False)
        preprocess = Transform(512)
        dataloader_1 = Dataloader('/home/aistudio/dataset',
                                  '/home/aistudio/dataset/val_list.txt',
                                  transform=preprocess,
                                  shuffle=True)
        val_load = fluid.io.DataLoader.from_generator(capacity=1,
                                                      use_multiprocess=False)
        val_load.set_sample_generator(dataloader_1, batch_size=1, places=Place)
        model_dic, optic_dic = load_dygraph(
            "./output/SETR-NotZero-Epoch-2-Loss-0.161517-MIOU-0.325002")
        model.load_dict(model_dic)
        model.eval()
        '''result = get_infer_data("/home/aistudio/dataset/infer")
        infer_load  = Load_infer('/home/aistudio/dataset', result, transform=preprocess, shuffle=False)
        loader_infer= fluid.io.DataLoader.from_generator(capacity=1, use_multiprocess=False)
        loader_infer.set_sample_generator(infer_load, batch_size=1, places=Place)
        process_image(model, loader_infer, result)'''
        validation(val_load, model, 15)
Esempio n. 3
0
    def __init__(self):
        super(TMLU, self).__init__()

        self.global_step = 0

        # Data Pipeline
        data_pipeline = Preprocess(cfg)
        self.train_dataset, self.val_dataset = data_pipeline.get_data()

        self.tokenizer_pt = data_pipeline.tokenizer_pt
        self.tokenizer_en = data_pipeline.tokenizer_en

        cfg.input_vocab_size = self.tokenizer_pt.vocab_size + 2
        cfg.target_vocab_size = self.tokenizer_en.vocab_size + 2

        # Model
        self.transformer = Transformer(cfg)

        # Optimizer
        learning_rate = CustomSchedule(cfg.d_model)
        self.optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

        # Loss and Metrics
        self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
        self.train_loss = tf.keras.metrics.Mean(name='train_loss')
        self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

        # Build writers for logging
        self.build_writers()

        checkpoint_path = "./checkpoints/train"
        self.ckpt = tf.train.Checkpoint(transformer=self.transformer, optimizer=self.optimizer)
        self.ckpt_manager = tf.train.CheckpointManager(self.ckpt, checkpoint_path, max_to_keep=5)
Esempio n. 4
0
def train(args):
    vocab = data_utils.get_vocab(vocab_file=args.vocab_file, min_freq=args.min_vocab_freq)
    # vocab = {}
    # with open(args.vocab_file, mode='r') as infile:
    #     for line in infile:
    #         w, w_id = line.split('\t')
    #         vocab[w] = int(w_id)

    print('Vocab loaded...')
    print('VOCAB SIZE = ', len(vocab))

    if args.model_type == 'transformer':
        transformer = Transformer(args=args, vocab=vocab)
        transformer.train_generator()
    elif args.model_type == 'rnn':
        rnn_params = {'rec_cell': 'lstm',
                     'encoder_dim': 800,
                     'decoder_dim': 800,
                     'num_encoder_layers': 2,
                     'num_decoder_layers': 2
                     }
        rnn = RNNSeq2Seq(args=args, rnn_params=rnn_params, vocab=vocab)
        # rnn.train()
        rnn.train_keras()
    elif args.model_type == 'han_rnn':
        han_rnn = HanRnnSeq2Seq(args=args, vocab=vocab)
        han_rnn.train()
    elif args.model_type == 'cnn':
        cnn = ConvSeq2Seq(args=args, vocab=vocab)
        cnn.train_keras()

    return
    def __init__(self, config):
        self.config = config
        self.prepare_dataloaders(config['data'])

        # self.model = MLP(config['MLP'])
        # self.model = MLP_3D(config['MLP'])
        # self.model = LSTM(config['LSTM'])
        self.model = Transformer(config['Trans'])
        print(self.model)

        self.model_name = config['train']['model_name']

        self.checkpoint_dir = './checkpoint_dir/{}/'.format(self.model_name)
        if not os.path.exists(self.checkpoint_dir):
            os.mkdir(self.checkpoint_dir)
        self.tb_log_dir = './tb_log/{}/'.format(self.model_name)
        if not os.path.exists(self.tb_log_dir):
            os.mkdir(self.tb_log_dir)

        self.optimal_metric = 100000
        self.cur_metric = 100000

        self.loss = nn.MSELoss()
        self.optim = optim.Adam(self.model.parameters(),
                                lr=self.config['train']['lr'],
                                betas=(0.5, 0.999))
Esempio n. 6
0
def evaluate(args):
    label_map = load_label_map(args.dataset)
    n_classes = 50
    if args.dataset == "include":
        n_classes = 263

    if args.use_cnn:
        dataset = FeaturesDatset(
            features_dir=os.path.join(args.data_dir,
                                      f"{args.dataset}_test_features"),
            label_map=label_map,
            mode="test",
        )

    else:
        dataset = KeypointsDataset(
            keypoints_dir=os.path.join(args.data_dir,
                                       f"{args.dataset}_test_keypoints"),
            use_augs=False,
            label_map=label_map,
            mode="test",
            max_frame_len=169,
        )

    dataloader = data.DataLoader(
        dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
    )

    if args.model == "lstm":
        config = LstmConfig()
        if args.use_cnn:
            config.input_size = CnnConfig.output_dim
        model = LSTM(config=config, n_classes=n_classes)
    else:
        config = TransformerConfig(size=args.transformer_size)
        if args.use_cnn:
            config.input_size = CnnConfig.output_dim
        model = Transformer(config=config, n_classes=n_classes)

    model = model.to(device)

    if args.use_pretrained == "evaluate":
        model, _, _ = load_pretrained(args, n_classes, model)
        print("### Model loaded ###")

    else:
        exp_name = get_experiment_name(args)
        model_path = os.path.join(args.save_path, exp_name) + ".pth"
        ckpt = torch.load(model_path)
        model.load_state_dict(ckpt["model"])
        print("### Model loaded ###")

    test_loss, test_acc = validate(dataloader, model, device)
    print("Evaluation Results:")
    print(f"Loss: {test_loss}, Accuracy: {test_acc}")
Esempio n. 7
0
def change_max_pos_embd(args, new_mpe_size, n_classes):
    config = TransformerConfig(size=args.transformer_size,
                               max_position_embeddings=new_mpe_size)
    if args.use_cnn:
        config.input_size = CnnConfig.output_dim
    model = Transformer(config=config, n_classes=n_classes)
    model = model.to(device)
    return model
Esempio n. 8
0
def train(args):
    src_root = args.src_root
    sr = args.sample_rate
    dt = args.delta_time
    batch_size = args.batch_size
    model_type = args.model_type
    params = {'N_CLASSES':len(os.listdir(args.src_root)),
              'SR':sr,
              'DT':dt}
    models = {'conv1d':Conv1D(**params),
              'conv2d':Conv2D(**params),
              'lstm':  LSTM(**params),
              'transformer': Transformer(**params),
              'ViT': ViT(**params)}

    assert model_type in models.keys(), '{} not an available model'.format(model_type)
    csv_path = os.path.join('logs', '{}_history.csv'.format(model_type))

    wav_paths = glob('{}/**'.format(src_root), recursive=True)
    wav_paths = [x.replace(os.sep, '/') for x in wav_paths if '.wav' in x]
    classes = sorted(os.listdir(args.src_root))
    le = LabelEncoder()
    le.fit(classes)
    labels = [os.path.split(x)[0].split('/')[-1] for x in wav_paths]
    labels = le.transform(labels)
    wav_train, wav_val, label_train, label_val = train_test_split(wav_paths,
                                                                  labels,
                                                                  test_size=0.1,
                                                                  random_state=0)

    assert len(label_train) >= args.batch_size, 'Number of train samples must be >= batch_size'
    if len(set(label_train)) != params['N_CLASSES']:
        warnings.warn('Found {}/{} classes in training data. Increase data size or change random_state.'.format(len(set(label_train)), params['N_CLASSES']))
    if len(set(label_val)) != params['N_CLASSES']:
        warnings.warn('Found {}/{} classes in validation data. Increase data size or change random_state.'.format(len(set(label_val)), params['N_CLASSES']))

    tg = DataGenerator(wav_train, label_train, sr, dt,
                       params['N_CLASSES'], batch_size=batch_size)
    vg = DataGenerator(wav_val, label_val, sr, dt,
                       params['N_CLASSES'], batch_size=batch_size)
    model = models[model_type]
    model.summary()
    cp = ModelCheckpoint('models/{}.h5'.format(model_type), monitor='val_loss',
                         save_best_only=True, save_weights_only=False,
                         mode='auto', save_freq='epoch', verbose=1)
    csv_logger = CSVLogger(csv_path, append=False)
    model.fit(tg, validation_data=vg,
              epochs=40, verbose=1,
              callbacks=[csv_logger])
Esempio n. 9
0
 def __init__(self,
              voc_size_src,
              voc_size_tar,
              max_pe,
              num_encoders,
              num_decoders,
              emb_size,
              num_head,
              ff_inner=2048,
              p_dropout=0.1):
     super(mBART, self).__init__()
     self.transformer = Transformer.Transformer(voc_size_src, voc_size_tar,
                                                max_pe, num_encoders,
                                                num_decoders, emb_size,
                                                num_head, ff_inner,
                                                p_dropout)
Esempio n. 10
0
def create_model_transformer_b2a(arg, devices_list, eval=False):
    from models import Transformer
    resume_dataset = arg.eval_dataset_transformer if eval else arg.dataset
    resume_b = arg.eval_split_source_trasformer if eval else arg.split_source
    resume_a = arg.eval_split_trasformer if eval else arg.split
    resume_epoch = arg.eval_epoch_transformer if eval else arg.resume_epoch

    transformer = Transformer(in_channels=boundary_num,
                              out_channels=boundary_num)

    if resume_epoch > 0:
        load_path = arg.resume_folder + 'transformer_' + resume_dataset + '_' + resume_b + '2' + resume_a + '_' + str(
            resume_epoch) + '.pth'
        print('Loading Transformer from ' + load_path)
        transformer = load_weights(transformer, load_path, devices_list[0])
    else:
        init_weights(transformer, init_type='transformer')
        # init_weights(transformer)

    if arg.cuda:
        transformer = transformer.cuda(device=devices_list[0])

    return transformer
Esempio n. 11
0
    def __init__(self, config):
        super(MultiTaskNMT, self).__init__()
        self.config = config

        self.model1 = Transformer(config)
        self.model2 = Transformer(config)

        # Identify the language flags for training the model
        self.lang1 = find_key_from_val(config.id2w, config.lang1)
        self.lang2 = find_key_from_val(config.id2w, config.lang2)

        # Weight sharing between the two models

        # Embedding Layer weight sharing
        self.model1.embed_word.weight = self.model2.embed_word.weight

        if config.pshare_decoder_param:
            self.model1.encoder = self.model2.encoder

        # Query Linear Layer Weight sharing in transformer encoder
        for i in range(config.layers):
            if config.pshare_decoder_param:
                # Share Decoder Params
                # Share Query
                if 'q' in config.share_sublayer:
                    if "self" in config.attn_share:
                        self.model1.decoder.layers[i].self_attention.W_Q.weight = \
                        self.model2.decoder.layers[i].self_attention.W_Q.weight
                    if "source" in config.attn_share:
                        self.model1.decoder.layers[i].source_attention.W_Q.weight = \
                        self.model2.decoder.layers[i].source_attention.W_Q.weight

                # Share Key
                if 'k' in config.share_sublayer:
                    if "self" in config.attn_share:
                        self.model1.decoder.layers[i].self_attention.W_K.weight = \
                        self.model2.decoder.layers[i].self_attention.W_K.weight
                    if "source" in config.attn_share:
                        self.model1.decoder.layers[i].source_attention.W_K.weight = \
                        self.model2.decoder.layers[i].source_attention.W_K.weight

                # Share Value
                if 'v' in config.share_sublayer:
                    if "self" in config.attn_share:
                        self.model1.decoder.layers[i].self_attention.W_V.weight = \
                        self.model2.decoder.layers[i].self_attention.W_V.weight
                    if "source" in config.attn_share:
                        self.model1.decoder.layers[i].source_attention.W_V.weight = \
                        self.model2.decoder.layers[i].source_attention.W_V.weight

                # Share last Finishing Linear Layer
                if 'f' in config.share_sublayer:
                    if "self" in config.attn_share:
                        self.model1.decoder.layers[i].self_attention.finishing_linear_layer.weight = \
                        self.model2.decoder.layers[i].self_attention.finishing_linear_layer.weight
                    if "source" in config.attn_share:
                        self.model1.decoder.layers[i].source_attention.finishing_linear_layer.weight = \
                        self.model2.decoder.layers[i].source_attention.finishing_linear_layer.weight

                # Share LayerNorm
                self.model1.decoder.layers[
                    i].ln_1 = self.model2.decoder.layers[i].ln_1
                self.model1.decoder.layers[
                    i].ln_2 = self.model2.decoder.layers[i].ln_2

                # Share the linear layers
                if 'linear' in config.share_sublayer:
                    self.model1.decoder.layers[i].feed_forward = \
                        self.model2.decoder.layers[i].feed_forward
                    self.model1.decoder.layers[i].ln_3 = \
                        self.model2.decoder.layers[i].ln_3
Esempio n. 12
0
def main():
    questions, answers = load_conversations()
    # Build tokenizer using tfds for both questions and answers
    tokenizer = tfds.features.text.SubwordTextEncoder.build_from_corpus(
        questions + answers, target_vocab_size=2**13)

    tokenizer.save_to_file(vocab_filename)

    # Vocabulary size plus start and end token
    VOCAB_SIZE = tokenizer.vocab_size + 2

    questions, answers = tokenize_and_filter(questions, answers, tokenizer)
    print('Vocab size: {}'.format(VOCAB_SIZE))
    print('Number of samples: {}'.format(len(questions)))
    # decoder inputs use the previous target as input
    # remove START_TOKEN from targets
    dataset = tf.data.Dataset.from_tensor_slices((
        {
            'inputs': questions
        },
        {
            'outputs': answers
        },
    ))

    dataset = dataset.cache()
    dataset = dataset.shuffle(BUFFER_SIZE)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    print(dataset)

    model = Transformer(num_layers=NUM_LAYERS,
                        units=UNITS,
                        d_model=D_MODEL,
                        num_heads=NUM_HEADS,
                        vocab_size=VOCAB_SIZE,
                        dropout=DROPOUT,
                        name='transformer')

    learning_rate = CustomSchedule(D_MODEL)

    optimizer = tf.keras.optimizers.Adam(learning_rate,
                                         beta_1=0.9,
                                         beta_2=0.98,
                                         epsilon=1e-9)

    ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)

    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              checkpoint_path,
                                              max_to_keep=5)

    # if a checkpoint exists, restore the latest checkpoint.
    if ckpt_manager.latest_checkpoint:
        ckpt.restore(ckpt_manager.latest_checkpoint)
        print('Latest checkpoint restored!!')

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')

    for epoch in range(EPOCHS):
        start = time.time()

        train_loss.reset_states()
        train_accuracy.reset_states()

        for (batch, (inp, tar)) in enumerate(dataset):

            train_step(inp, tar, model, optimizer, train_loss, train_accuracy)

            if batch % 500 == 0:
                print('Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}'.format(
                    epoch + 1, batch, train_loss.result(),
                    train_accuracy.result()))

        if (epoch + 1) % 5 == 0:
            ckpt_save_path = ckpt_manager.save()
            print('Saving checkpoint for epoch {} at {}'.format(
                epoch + 1, ckpt_save_path))

        print('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format(
            epoch + 1, train_loss.result(), train_accuracy.result()))

        print('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

    model.save_weights(save_weight_path)
    #model.summary()
    input_sentence = 'Where have you been?'
    predict(input_sentence, tokenizer, model)

    sentence = 'I am not crazy, my mother had me tested.'
    for _ in range(5):
        sentence = predict(sentence, tokenizer, model)
        print('')
Esempio n. 13
0
    keypoints_dir=save_dir,
    max_frame_len=169,
)

dataloader = data.DataLoader(
    dataset,
    batch_size=1,
    shuffle=False,
    num_workers=4,
    pin_memory=True,
)
label_map = dict(zip(label_map.values(), label_map.keys()))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
config = TransformerConfig(size="large", max_position_embeddings=256)
model = Transformer(config=config, n_classes=263)
model = model.to(device)

pretrained_model_name = "include_no_cnn_transformer_large.pth"
pretrained_model_links = load_json("pretrained_links.json")
if not os.path.isfile(pretrained_model_name):
    link = pretrained_model_links[pretrained_model_name]
    torch.hub.download_url_to_file(link, pretrained_model_name, progress=True)

ckpt = torch.load(pretrained_model_name)
model.load_state_dict(ckpt["model"])
print("### Model loaded ###")

preds = inference(dataloader, model, device, label_map)
print(json.dumps(preds, indent=2))
Esempio n. 14
0
    no_up = 0
    # 定义计算图
    print('---start graph---')
    with tf.Graph().as_default():

        session_conf = tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)
        session_conf.gpu_options.allow_growth = True
        session_conf.gpu_options.per_process_gpu_memory_fraction = 0.9  # 配置gpu占用率

        sess = tf.Session(config=session_conf)

        # 定义会话
        with sess.as_default():

            transformer = Transformer(config, wordEmbedding)

            globalStep = tf.Variable(0, name="globalStep", trainable=False)
            # 定义优化函数,传入学习速率参数
            optimizer = tf.train.MomentumOptimizer(
                learning_rate=config.training.learningRate, momentum=0.1)  #
            #optimizer = tf.train.AdamOptimizer(config.training.learningRate,beta1=0.9,beta2=0.999,epsilon=1e-08,)
            #optimizer = tf.keras.optimizers.SGD(learning_rate=config.training.learningRate, momentum=0.1,nesterov=False)
            #optimizer = tf.train.GradientDescentOptimizer(learning_rate=config.training.learningRate)
            #adadelta
            #optimizer = tf.train.RMSPropOptimizer(config.training.learningRate, decay=0.9, momentum=0.1, epsilon=1e-10,)

            gradsAndVars = optimizer.compute_gradients(transformer.loss)
            '''
            mean_grad = tf.zeros(())
            for grad, var in gradsAndVars:
Esempio n. 15
0
    word2id_fr = pickle.load(f)
with open('./data/europarl/word2id_en.pickle', 'rb') as f:
    word2id_en = pickle.load(f)
with open('./data/europarl/input_sentences.pickle', 'rb') as f:
    input_sentences = pickle.load(f)
with open('./data/europarl/output_sentences.pickle', 'rb') as f:
    output_sentences = pickle.load(f)

n = input_sentences.shape[0]
n_train = int(0.9 * n)
perm = np.random.permutation(n)
train_in = input_sentences[perm[0:n_train]]
train_out = output_sentences[perm[0:n_train]]
val_in = input_sentences[perm[n_train:n]].values
val_out = output_sentences[perm[n_train:n]].values

model = Transformer(in_voc=(id2word_fr, word2id_fr),
                    out_voc=(id2word_en, word2id_en),
                    hidden_size=50,
                    lr=1e-3,
                    batch_size=128,
                    beam_size=10,
                    nb_epochs=10,
                    nb_heads=4,
                    pos_enc=True,
                    nb_layers=1)

model.fit(train_in, train_out)
model.save("./model/transformer.ckpt")
# model.evaluate(valid_data=(val_in, val_out)) # requires nltk
Esempio n. 16
0
    # model parameters
    dim_input = 6
    output_sequence_length = Y_DAYS
    dec_seq_len = Y_DAYS
    dim_val = 64
    dim_attn = 12#12
    n_heads = 8 
    n_encoder_layers = 4
    n_decoder_layers = 2

    # paths
    PATHS = crypto_data_paths()
    MODEL_PATH = 'weights/trans/stock/3days/{e}_{d}_{v}_{y}_seed{seed}'.format(e=n_encoder_layers, d=n_decoder_layers, v=dim_val, y=Y_DAYS, seed=SEED)

    #init network
    net = Transformer(dim_val, dim_attn, dim_input, dec_seq_len, output_sequence_length, n_decoder_layers, n_encoder_layers, n_heads)
    #net.load_state_dict(torch.load(MODEL_PATH))
    # load the dataset
    X_train, y_train, X_test, y_test = create_input_data(PATHS, N_LAGS, Y_DAYS)
    train_dataset = StockDataset(X_train, y_train)
    train_loader = DataLoader(dataset=train_dataset,     
                            batch_size=BATCH_SIZE)
    test_dataset = StockDataset(X_test, y_test)
    test_loader = DataLoader(dataset=test_dataset,     
                            batch_size=BATCH_SIZE)

    train(net, N_EPOCHS, train_loader, LR, MODEL_PATH)
    eval(net, MODEL_PATH, test_loader)
    #tensorboard --logdir=runs

    #The MSE is  0.0010176260894923762 0
Esempio n. 17
0
                        help="Clipped gradient norm")
    parser.add_argument("--model_no", type=int, default=0, help="Model ID")
    parser.add_argument("--num_epochs",
                        type=int,
                        default=350,
                        help="No of epochs")
    args = parser.parse_args()

    train_iter, FR, EN, train_length = load_dataloaders(args)
    src_vocab = len(EN.vocab)
    trg_vocab = len(FR.vocab)

    cuda = torch.cuda.is_available()
    net = Transformer(src_vocab=src_vocab,
                      trg_vocab=trg_vocab,
                      d_model=args.d_model,
                      num=args.num,
                      n_heads=args.n_heads)
    for p in net.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    criterion = nn.CrossEntropyLoss(reduction="mean", ignore_index=1)
    optimizer = optim.Adam(net.parameters(),
                           lr=args.lr,
                           betas=(0.9, 0.98),
                           eps=1e-9)
    #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,20,30,40,50,100,200], gamma=0.7)
    scheduler = CosineWithRestarts(optimizer, T_max=500)
    if cuda:
        net.cuda()
    start_epoch, acc = load_state(net,
Esempio n. 18
0
from pydub import AudioSegment
from models import Transformer
from utils import tokenizer, parse_wav, generate_input
import tensorflow as tf
import numpy as np

transformer = Transformer(2,
                          512,
                          8,
                          2048,
                          4337,
                          pe_input=4096,
                          pe_target=512,
                          rate=0.1,
                          training=False)
checkpoint_path = "results/asr-transfer/2/checkpoint-{:02d}".format(37)
transformer.load_weights(checkpoint_path)


def pre_process(wav_file):
    log_bank = parse_wav(wav_file)
    input, mask, target = generate_input(log_bank)
    return input, mask, target


def post_process(predicted_ids):
    y_predict = u"".join([tokenizer.id2token(id) for id in predicted_ids])
    return y_predict


def trans_mp3_to_wav(filepath):
Esempio n. 19
0
                        batch_size_fn=utils.batch_size_fn,
                        train=True)
val_iter = MyIterator(val,
                      batch_size=ARGS.batch_size,
                      device=ARGS.device,
                      repeat=False,
                      sort_key=lambda x: (len(x.src), len(x.trg)),
                      batch_size_fn=utils.batch_size_fn,
                      train=False)
print('Done get dataset')

# model
model = Transformer(len(SRC.vocab),
                    len(TGT.vocab),
                    N=ARGS.n_layers,
                    d_model=ARGS.d_model,
                    d_ff=4 * ARGS.d_model,
                    h=ARGS.n_heads,
                    dropout=ARGS.p_dropout).to(ARGS.device)
criterion = LabelSmoothing(size=len(TGT.vocab),
                           padding_idx=pad_idx,
                           smoothing=0.1).to(ARGS.device)

# train
if ARGS.run_mode == 'train':
    optimizer = NoamOpt(
        ARGS.d_model, 1, 2000,
        torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98),
                         eps=1e-9))
    iter_cnt = 1
    min_norm_val_loss = math.inf
Esempio n. 20
0
def main(argv):
    # Creating dataloaders for training and validation
    logging.info("Creating the source dataloader from: %s" % FLAGS.source)
    logging.info("Creating the target dataloader from: %s" % FLAGS.target)
    train_dataset, valid_dataset, src_tokenizer, \
    tar_tokenizer, size_train, size_val = prepare_training_pairs(FLAGS.source,
                                                                 FLAGS.target,
                                                                 FLAGS.syn_src,
                                                                 FLAGS.syn_tar,
                                                                 batch_size=FLAGS.batch_size,
                                                                 valid_ratio=0.1,
                                                                 name="ENFR")

    # calculate vocabulary size
    src_vocsize = len(src_tokenizer.word_index) + 1
    tar_vocsize = len(tar_tokenizer.word_index) + 1
    # ----------------------------------------------------------------------------------
    # Creating the instance of the model specified.
    logging.info("Create Transformer Model")
    optimizer = tf.keras.optimizers.Adam()
    model = Transformer.Transformer(voc_size_src=src_vocsize,
                                    voc_size_tar=tar_vocsize,
                                    max_pe=10000,
                                    num_encoders=FLAGS.num_enc,
                                    num_decoders=FLAGS.num_dec,
                                    emb_size=FLAGS.emb_size,
                                    num_head=FLAGS.num_head,
                                    ff_inner=FLAGS.ffnn_dim)

    # load pretrained mBart
    if FLAGS.load_mBart:
        print("Load Pretraining mBART...")
        mbart_ckpt_dir = FLAGS.mbartckpt
        latest = tf.train.latest_checkpoint(mbart_ckpt_dir)

        checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
        status = checkpoint.restore(tf.train.latest_checkpoint(mbart_ckpt_dir))
        status.assert_existing_objects_matched()

    # ----------------------------------------------------------------------------------
    # Choose the Optimizor, Loss Function, and Metrics
    # create custom learning rate schedule
    class transformer_lr_schedule(
            tf.keras.optimizers.schedules.LearningRateSchedule):
        def __init__(self, emb_size, warmup_steps=4000):
            super(transformer_lr_schedule, self).__init__()
            self.emb_size = tf.cast(emb_size, tf.float32)
            self.warmup_steps = warmup_steps

        def __call__(self, step):
            lr_option1 = tf.math.rsqrt(step)
            lr_option2 = step * (self.warmup_steps**-1.5)
            return tf.math.rsqrt(self.emb_size) * tf.math.minimum(
                lr_option1, lr_option2)

    learning_rate = transformer_lr_schedule(FLAGS.emb_size)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate,
                                         beta_1=0.9,
                                         beta_2=0.98,
                                         epsilon=1e-9)

    # Todo: figure out why SparceCategorticalCrossentropy
    criterion = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,
                                                              reduction='none')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')

    def loss_fn(label, pred):
        """
        The criterion above calculate the loss for all words (voc_size), need to mask the loss that
        not appears in label
        """
        mask = tf.math.logical_not(tf.math.equal(label, 0))
        loss = criterion(label, pred)

        # convert the mask from Bool to float
        mask = tf.cast(mask, dtype=loss.dtype)
        loss *= mask

        return tf.reduce_sum(loss) / tf.reduce_sum(mask)

    # ----------------------------------------------------------------------------------
    # train/valid function
    # Todo: need to understand this
    train_step_signature = [
        tf.TensorSpec(shape=[None, None], dtype=tf.int32),
        tf.TensorSpec(shape=[None, None], dtype=tf.int32)
    ]

    @tf.function(input_signature=train_step_signature)
    def train_step(inp, targ):
        tar_inp = targ[:, :-1]
        tar_real = targ[:, 1:]
        end = tf.cast(
            tf.math.logical_not(
                tf.math.equal(tar_inp, tar_tokenizer.word_index['<end>'])),
            tf.int32)
        tar_inp *= end
        # tf.print("tar inp", tar_inp)
        # tf.print("tar real", tar_real)
        # create mask
        enc_padding_mask = Transformer.create_padding_mask(inp)

        # mask for first attention block in decoder
        look_ahead_mask = Transformer.create_seq_mask(tf.shape(tar_inp)[1])
        dec_target_padding_mask = Transformer.create_padding_mask(tar_inp)
        combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)

        # mask for "enc_dec" multihead attention
        dec_padding_mask = Transformer.create_padding_mask(inp)

        with tf.GradientTape() as tape:
            # feed input into encoder
            predictions = model(inp, tar_inp, True, enc_padding_mask,
                                combined_mask, dec_padding_mask)
            train_loss = loss_fn(tar_real, predictions)

            # optimize step
            gradients = tape.gradient(train_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients,
                                          model.trainable_variables))
        return train_loss

    @tf.function(input_signature=train_step_signature)
    def valid_step(inp, targ):
        tar_inp = targ[:, :-1]
        tar_real = targ[:, 1:]
        end = tf.cast(
            tf.math.logical_not(
                tf.math.equal(tar_inp, tar_tokenizer.word_index['<end>'])),
            tf.int32)
        tar_inp *= end
        # create mask
        enc_padding_mask = Transformer.create_padding_mask(inp)

        # mask for first attention block in decoder
        look_ahead_mask = Transformer.create_seq_mask(tf.shape(tar_inp)[1])
        dec_target_padding_mask = Transformer.create_padding_mask(tar_inp)
        combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)

        # mask for "enc_dec" multihead attention
        dec_padding_mask = Transformer.create_padding_mask(inp)

        # feed input into encoder
        predictions = model(inp, tar_inp, False, enc_padding_mask,
                            combined_mask, dec_padding_mask)
        val_loss = loss_fn(tar_real, predictions)
        train_accuracy(tar_real, predictions)
        return val_loss

    # ----------------------------------------------------------------------------------
    # Set up Checkpoints, so as to resume training if something interrupt, and save results
    ckpt_prefix = os.path.join(FLAGS.ckpt, "ckpt_BT_ENFR_transformer")
    ckpt = tf.train.Checkpoint(optimizer=optimizer, model=model)
    manager = tf.train.CheckpointManager(ckpt,
                                         directory=FLAGS.ckpt,
                                         max_to_keep=2)
    # restore from latest checkpoint and iteration
    if not FLAGS.load_mBart:
        print("Load previous checkpoints...")
        status = ckpt.restore(manager.latest_checkpoint)
        if manager.latest_checkpoint:
            logging.info("Restored from {}".format(manager.latest_checkpoint))
            status.assert_existing_objects_matched()
        else:
            logging.info("Initializing from scratch.")

    # ----------------------------------------------------------------------------------
    # Setup the TensorBoard for better visualization
    logging.info("Setup the TensorBoard...")
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_log_dir = './logs/gradient_tape/' + current_time + '/BT_ENFR_transformer_train'
    test_log_dir = './logs/gradient_tape/' + current_time + '/BT_ENFR_transformer_test'
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    test_summary_writer = tf.summary.create_file_writer(test_log_dir)

    # ----------------------------------------------------------------------------------
    # Start Training Process
    EPOCHS = FLAGS.epochs

    for epoch in range(EPOCHS):
        start = time.time()
        total_train_loss = 0.
        total_val_loss = 0.

        # train
        for (inp, targ) in train_dataset:
            train_loss = train_step(inp, targ)
            total_train_loss += train_loss

        # save checkpoint
        if (epoch + 1) % 5 == 0:
            ckpt.save(file_prefix=ckpt_prefix)

        # validation
        for (inp, tar) in valid_dataset:
            val_loss = valid_step(inp, tar)
            total_val_loss += val_loss

        # average loss
        total_train_loss /= (size_train / FLAGS.batch_size)
        total_val_loss /= (size_val / FLAGS.batch_size)

        # Write loss to Tensorborad
        with train_summary_writer.as_default():
            tf.summary.scalar('Train loss', total_train_loss, step=epoch)

        with test_summary_writer.as_default():
            tf.summary.scalar('Valid loss', total_val_loss, step=epoch)

        logging.info(
            'Epoch {} Train Loss {:.4f} Valid loss {:.4f} Valid Accuracy {:.4f}'
            .format(epoch + 1, total_train_loss, total_val_loss,
                    train_accuracy.result()))

        logging.info(
            'Time taken for 1 train_step {} sec\n'.format(time.time() - start))
    maxlen = int(sys.argv[1])
else:
    maxlen = 16

args = subword_batches(zip(in_texts, tar_texts), maxlen)
args = subword_batches(zip(in_valid, tar_valid), maxlen, args)
dg = args[params.train_generator]
vdg = args[params.valid_generator]
input_vocab_size = args[params.input_vocab_size]
target_vocab_size = args[params.target_vocab_size]

transformer = Transformer(num_layers,
                          d_model,
                          num_heads,
                          dff,
                          input_vocab_size,
                          target_vocab_size,
                          pe_input=input_vocab_size,
                          pe_target=target_vocab_size,
                          target_len=args[params.valid_seq_len],
                          rate=dropout_rate)

transformer.compile(optimizer=optimizer,
                    loss=loss_function,
                    metrics=[accuracy_function, bleu_score])
history = transformer.fit(dg, epochs=params.epochs, validation_data=vdg)

transformer.save_weights(weights_dir + '/w' + str(maxlen) + '_ex' +
                         str(params.train_size))
json.dump(
    history.history,
    open(history_dir + '/h' + str(maxlen) + '_ex' + str(params.train_size),
Esempio n. 22
0
# embeddings are attributes
if args.dataset_name == 'aids':
    EMBED_DIM = 4
    num_classes = 2
    num_heads = 8
    depth = 6
    p, q = 1, 1
elif args.dataset_name == 'coildel':
    EMBED_DIM = 2
    num_classes = 100
    num_heads = 8
    depth = 6
    p, q = 1, 1

# k, num_heads, depth, seq_length, num_tokens, num_
model = Transformer(EMBED_DIM, num_heads, test_dataset.walklength, depth,
                    num_classes).to(device)

lr_warmup = 10000

lr = 1e-3

opt = torch.optim.Adam(lr=lr, params=model.parameters())
sch = torch.optim.lr_scheduler.LambdaLR(
    opt, lambda i: min(i / (lr_warmup / args.batch_size), 1.0))
loss_func = nn.NLLLoss()


def train_validate(model, loader, opt, loss_func, train, device):

    if train:
        model.train()
Esempio n. 23
0
def fit(args):
    exp_name = get_experiment_name(args)
    logging_path = os.path.join(args.save_path, exp_name) + ".log"
    logging.basicConfig(filename=logging_path,
                        level=logging.INFO,
                        format="%(message)s")
    seed_everything(args.seed)
    label_map = load_label_map(args.dataset)

    if args.use_cnn:
        train_dataset = FeaturesDatset(
            features_dir=os.path.join(args.data_dir,
                                      f"{args.dataset}_train_features"),
            label_map=label_map,
            mode="train",
        )
        val_dataset = FeaturesDatset(
            features_dir=os.path.join(args.data_dir,
                                      f"{args.dataset}_val_features"),
            label_map=label_map,
            mode="val",
        )

    else:
        train_dataset = KeypointsDataset(
            keypoints_dir=os.path.join(args.data_dir,
                                       f"{args.dataset}_train_keypoints"),
            use_augs=args.use_augs,
            label_map=label_map,
            mode="train",
            max_frame_len=169,
        )
        val_dataset = KeypointsDataset(
            keypoints_dir=os.path.join(args.data_dir,
                                       f"{args.dataset}_val_keypoints"),
            use_augs=False,
            label_map=label_map,
            mode="val",
            max_frame_len=169,
        )

    train_dataloader = data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=4,
        pin_memory=True,
    )
    val_dataloader = data.DataLoader(
        val_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
    )

    n_classes = 50
    if args.dataset == "include":
        n_classes = 263

    if args.model == "lstm":
        config = LstmConfig()
        if args.use_cnn:
            config.input_size = CnnConfig.output_dim
        model = LSTM(config=config, n_classes=n_classes)
    else:
        config = TransformerConfig(size=args.transformer_size)
        if args.use_cnn:
            config.input_size = CnnConfig.output_dim
        model = Transformer(config=config, n_classes=n_classes)

    model = model.to(device)
    optimizer = torch.optim.AdamW(model.parameters(),
                                  lr=args.learning_rate,
                                  weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.2)

    if args.use_pretrained == "resume_training":
        model, optimizer, scheduler = load_pretrained(args, n_classes, model,
                                                      optimizer, scheduler)

    model_path = os.path.join(args.save_path, exp_name) + ".pth"
    es = EarlyStopping(patience=15, mode="max")
    for epoch in range(args.epochs):
        print(f"Epoch: {epoch+1}/{args.epochs}")
        train_loss, train_acc = train(train_dataloader, model, optimizer,
                                      device)
        val_loss, val_acc = validate(val_dataloader, model, device)
        logging.info(
            "Epoch: {}, train loss: {}, train acc: {}, val loss: {}, val acc: {}"
            .format(epoch + 1, train_loss, train_acc, val_loss, val_acc))
        scheduler.step(val_acc)
        es(
            model_path=model_path,
            epoch_score=val_acc,
            model=model,
            optimizer=optimizer,
            scheduler=scheduler,
        )
        if es.early_stop:
            print("Early stopping")
            break

    print("### Training Complete ###")
Esempio n. 24
0
            steps_per_epoch = train_data_size // batch_size
            warmup_steps = int(epochs * train_data_size * 0.1 / batch_size)
            eval_steps = dev_data_size // batch_size * 10

            # optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

            optimizer = get_optimizer(learning_rate, steps_per_epoch, epochs, warmup_steps)

            callbacks = get_callbacks(model_dir)

            transformer = Transformer(num_layers,
                                      d_model,
                                      num_heads,
                                      dff,
                                      target_vocab_size,
                                      pe_input=pe_input,
                                      pe_target=pe_target,
                                      rate=dropout_rate,
                                      training=train)

            transformer.compile(optimizer=optimizer, loss=loss_function, metrics=[metric_fn()])

            checkpoint_path = "results/asr-transfer/2/checkpoint-{:02d}".format(37)
            transformer.load_weights(checkpoint_path)

            transformer.fit(
                train_data,
                # validation_data=dev_data,
                steps_per_epoch=steps_per_epoch,
                epochs=epochs,
Esempio n. 25
0
def main():
    device = torch.device("cpu" if hparams.no_cuda else "cuda")

    print("=== build model ===")
    start = time.time()
    model = Transformer(hparams.d_model, hparams.d_ff, vocab_size,
                        hparams.num_heads, hparams.num_layers, hparams.max_len,
                        hparams.dropout, EOS_id, PAD_id, device).to(device)
    end = time.time()
    print("=== build model done === {} seconds".format(end - start))

    train.global_step = 0

    #     train_dataset, val_dataset = split_data(train_path_en, train_path_de, hparams.validation_rate)
    train_dataset = make_dataset(train_path_en, train_path_de)
    val_dataset = make_dataset(val_path_en, val_path_de)

    train_loader = DataLoader(train_dataset,
                              batch_size=hparams.batch_size,
                              collate_fn=custom_collate,
                              shuffle=True,
                              num_workers=hparams.num_workers)
    val_loader = DataLoader(val_dataset,
                            batch_size=hparams.batch_size,
                            collate_fn=custom_collate,
                            num_workers=hparams.num_workers)

    criterion = torch.nn.NLLLoss(ignore_index=PAD_id,
                                 reduction="sum").to(device)
    optimizer = torch.optim.Adam(model.parameters(), hparams.lr)
    writer = SummaryWriter()

    for epoch in range(hparams.max_epochs):
        """train"""
        print("=== train start ===")
        start = time.time()

        loss, bleu_score = train(model, train_loader, criterion, optimizer,
                                 device, writer, epoch, hparams.print_steps)

        end = time.time()
        print("=== train done === {} seconds".format(end - start))
        print("epoch: {}/{}, loss: {}, bleu score: {}".format(
            epoch + 1, hparams.max_epochs, loss, bleu_score))

        torch.save(model.state_dict(), save_path)
        print("model saved to '{}'".format(os.path.abspath(save_path)))

        writer.add_scalar("Loss/train", loss, epoch + 1)
        writer.add_scalar("Bleu score/train", bleu_score, epoch + 1)
        """"""

        print("=== evaluation start ===")
        start = time.time()

        loss, bleu_score = evaluate(model, val_loader, criterion, optimizer,
                                    device, writer)

        end = time.time()
        print("=== evaluation done === {} seconds".format(end - start))
        print("epoch: {}/{}, loss: {}, bleu score: {}".format(
            epoch + 1, hparams.max_epochs, loss, bleu_score))

        writer.add_scalar("Loss/eval", loss, epoch + 1)
        writer.add_scalar("Bleu score/eval", bleu_score, epoch + 1)
Esempio n. 26
0
    #print(df[df["T (degC)"] < 0]["T (degC)"])
    df = df.drop(["max. wv (m/s)", "wv (m/s)"], axis=1)

    day = 24 * 60 * 60
    year = (365.2425) * day

    timestamp_s = date_time.map(datetime.datetime.timestamp)
    df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
    df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
    df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
    df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))

    print(df.head(20))

    # モデルのインスタンスを作成
    model = Transformer()
    #model = LSTMM()
    loss_object = tf.keras.losses.BinaryCrossentropy()
    optimizer = tf.keras.optimizers.Adam(0.01)
    #
    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.BinaryAccuracy()

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.BinaryAccuracy()

    @tf.function
    def train_step(seq, labels):
        with tf.GradientTape() as tape:
            predictions = model(seq)
            tf.print(predictions[0])
def generate_predictions(ckpt, path_src, path_tar, input_file_path: str,
                         pred_file_path: str, num_sync):
    """Generates predictions for the machine translation task (EN->FR).
    You are allowed to modify this function as needed, but one again, you cannot
    modify any other part of this file. We will be importing only this function
    in our final evaluation script. Since you will most definitely need to import
    modules for your code, you must import these inside the function itself.
    Args:
        input_file_path: the file path that contains the input data.
        pred_file_path: the file path where to store the predictions.
    Returns: None
    """
    # load input file => create test dataloader => (spm encode)
    from data.dataloaders import prepare_test
    BATCH_SIZE = 128
    TOTAL_ITER = int((num_sync / 128))

    # load  tokenizer of train to tokenize test data
    import pickle
    f_src = open(path_src, 'rb')
    f_tar = open(path_tar, 'rb')
    src_tokenizer = pickle.load(f_src)
    tar_tokenizer = pickle.load(f_tar)

    test_dataset, test_max_length = prepare_test(input_file_path,
                                                 src_tokenizer,
                                                 batch_size=BATCH_SIZE)
    # create model
    from models import Transformer
    import tensorflow as tf
    src_vocsize = len(src_tokenizer.word_index) + 1
    tar_vocsize = len(tar_tokenizer.word_index) + 1
    # create model instance
    optimizer = tf.keras.optimizers.Adam()
    model = Transformer.Transformer(voc_size_src=src_vocsize,
                                    voc_size_tar=tar_vocsize,
                                    max_pe=10000,
                                    num_encoders=4,
                                    num_decoders=4,
                                    emb_size=512,
                                    num_head=8,
                                    ff_inner=1024)

    # Load CheckPoint
    ckpt_dir = ckpt
    checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
    status = checkpoint.restore(tf.train.latest_checkpoint(ckpt_dir))
    status.assert_existing_objects_matched()

    # Greedy Search / Beam Search and write to pred_file_path
    import time
    from translate import translate_batch
    start = time.time()
    count = 0
    with open(pred_file_path, 'w', encoding='utf-8') as pred_file:
        for (batch, (inp)) in enumerate(test_dataset):
            print("Evaluating Batch: %s" % batch)
            batch_size = tf.shape(inp)[0].numpy()
            translation = translate_batch(model, inp, batch_size,
                                          tar_tokenizer)
            for sentence in translation:
                pred_file.write(sentence.strip() + '\n')
                pred_file.flush()
            count += 1
            if count > TOTAL_ITER:
                break
    end = time.time()
    print("Translation finish in %s s" % (end - start))
print(src_tokenizer.index_word)

src_vocsize = len(src_tokenizer.word_index) + 1
tar_vocsize = len(tar_tokenizer.word_index) + 1
print("Source Language voc size: %s" % src_vocsize)
print("Target Language voc size: %s" % tar_vocsize)

print("Source Language max length: %s" % source_max_length)
print("Target Language max length: %s" % target_max_length)

model = Transformer.Transformer(voc_size_src=src_vocsize,
                                voc_size_tar=tar_vocsize,
                                src_max_length=source_max_length,
                                tar_max_length=target_max_length,
                                num_encoders=1,
                                num_decoders=1,
                                emb_size=8,
                                num_head=2,
                                ff_inner=1024)

tf.random.set_seed(12)
for src, tar in train_dataset:
    # create mask
    enc_padding_mask = Transformer.create_padding_mask(src)
    print("enc_padding_mask", enc_padding_mask.numpy())

    # mask for first attention block in decoder
    look_ahead_mask = Transformer.create_seq_mask(target_max_length)
    print("look ahead mask", look_ahead_mask.numpy())
    dec_target_padding_mask = Transformer.create_padding_mask(tar)
Esempio n. 29
0
plt.ylabel("Learning Rate")
plt.xlabel("Train Step")


# Loss and Metrics
# -------------------------------------------------------------------------------------------------------
print("Loss and Metrics\n------------------------------------------")
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')


# Training and Checkpoints
# -------------------------------------------------------------------------------------------------------
print("Training and Checkpoints\n------------------------------------------")
transformer = Transformer(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, dropout_rate)

checkpoint_path = "./checkpoints/train"

ckpt = tf.train.Checkpoint(transformer=transformer, optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

# if a checkpoint exists, restore the latest checkpoint.
if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print ('Latest checkpoint restored!!')

for epoch in range(EPOCHS):
    start = time.time()
    
Esempio n. 30
0
def generate_predictions(input_file_path: str, pred_file_path: str):
    """Generates predictions for the machine translation task (EN->FR).
    You are allowed to modify this function as needed, but one again, you cannot
    modify any other part of this file. We will be importing only this function
    in our final evaluation script. Since you will most definitely need to import
    modules for your code, you must import these inside the function itself.
    Args:
        input_file_path: the file path that contains the input data.
        pred_file_path: the file path where to store the predictions.
    Returns: None
    """
    # ---------------------------------------------------------------------
    # Include essential module for evaluation
    import os
    import json
    import pickle
    import time
    import tensorflow as tf
    from data.dataloaders import prepare_training_pairs, prepare_test
    from models import Transformer
    from translate import translate_batch
    from definition import ROOT_DIR
    CONFIG = "eval_cfg.json"
    # ---------------------------------------------------------------------
    # Load setting in json file

    with open(os.path.join(ROOT_DIR, CONFIG)) as f:
        para = json.load(f)
    batch_size = para["batch_size"]
    source = para["src"]
    target = para["tar"]
    ckpt_dir = para["ckpt"]

    # ---------------------------------------------------------------------
    # Create test dataloader from input file (tokenized and map to sequence)

    # Todo: The final training and target tokenizer is needed, so as to use the same tokenizer on test data,
    #  because we didn't build a dictionary file.
    f_src = open(source, 'rb')
    f_tar = open(target, 'rb')
    src_tokenizer = pickle.load(f_src)
    tar_tokenizer = pickle.load(f_tar)

    test_dataset, test_max_length = prepare_test(input_file_path,
                                                 src_tokenizer,
                                                 batch_size=batch_size)
    # calculate vocabulary size
    src_vocsize = len(src_tokenizer.word_index) + 1
    tar_vocsize = len(tar_tokenizer.word_index) + 1
    # ---------------------------------------------------------------------
    # Create the instance of model to load checkpoints
    # Todo: Define the model that fit the checkpoints you want to load
    optimizer = tf.keras.optimizers.Adam()
    model = Transformer.Transformer(voc_size_src=src_vocsize,
                                    voc_size_tar=tar_vocsize,
                                    max_pe=10000,
                                    num_encoders=4,
                                    num_decoders=4,
                                    emb_size=512,
                                    num_head=8,
                                    ff_inner=1024)

    # ---------------------------------------------------------------------
    # Load CheckPoint
    checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
    status = checkpoint.restore(tf.train.latest_checkpoint(ckpt_dir))
    # check if loading is successful
    status.assert_existing_objects_matched()

    # ---------------------------------------------------------------------
    # Use Greedy Search to generate prediction and write to pred_file_path
    start = time.time()
    with open(pred_file_path, 'w', encoding='utf-8') as pred_file:
        for (batch, (inp)) in enumerate(test_dataset):
            if batch % 5 == 0:
                print("Evaluating Batch: %s" % batch)
            batch_size = tf.shape(inp)[0].numpy()
            translation = translate_batch(model, inp, batch_size,
                                          tar_tokenizer)
            for sentence in translation:
                pred_file.write(sentence.strip() + '\n')
    end = time.time()
    print("Translation finish in %s s" % (end - start))