コード例 #1
0
    def build(self):
        inputs = tf.keras.layers.Input(name='char_ids',
                                       shape=[None],
                                       dtype=tf.int32)
        x = TransformerModel(vocab_size=self.config["model"]["vocab_size"],
                             **self.config["model"]["transformer"])(inputs)
        outputs = tf.keras.layers.Dense(self.config["model"]["vocab_size"],
                                        activation=tf.nn.softmax)(x)

        self.model = tf.keras.Model(inputs=inputs, outputs=outputs)

        if self.config["train"]["noam_scheme"]:
            # https://www.tensorflow.org/tutorials/text/transformer#optimizer
            d_model = self.config["model"]["transformer"][
                "num_heads"] * self.config["model"]["transformer"]["head_dim"]
            warmup_steps = self.config["train"]["warmup_steps"]
            learning_rate = CustomSchedule(d_model=d_model,
                                           warmup_steps=warmup_steps)
            optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate,
                                                 beta_1=0.9,
                                                 beta_2=0.98,
                                                 epsilon=1e-9)
        else:
            optimizer = tf.keras.optimizers.Adam()

        self.model.compile(loss=ce_masked_loss,
                           optimizer=optimizer,
                           metrics=['accuracy'])
コード例 #2
0
ファイル: main.py プロジェクト: drug-ai/TensorflowSeries
def main():
    # hyperparameters
    num_layers = 4
    d_model = 128
    dff = 512
    num_heads = 8
    dropout_rate = 0.1
    epochs = 20
    pe_input, pe_target = 500, 500

    # prepare dataset
    train_dataset, val_dataset, enc_vocab_size, dec_vocab_size  = get_dataset(
        trainfile ='data/retrosynthesis-train.smi',
        validfile='data/retrosynthesis-valid.smi',
        n_read_threads=5, BUFFER_SIZE=20000, BATCH_SIZE=64)


    input_vocab_size = enc_vocab_size + 2
    target_vocab_size = dec_vocab_size + 2

    # build transformer model
    transformer = Transformer(num_layers, d_model, num_heads, dff,
                              input_vocab_size, target_vocab_size,
                              pe_input=pe_input,
                              pe_target=pe_target,
                              rate=dropout_rate)

    # Create optimizer
    learning_rate = CustomSchedule(d_model)
    optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

    # create model checkpoint
    ckpt_manager = get_ckpt_manager(transformer, optimizer)

    # training
    # train(train_dataset, transformer, epochs, ckpt_manager, optimizer)

    # evaluating


    # predicting
    inp_sequence = "Ic1ccc2n(CC(=O)N3CCCCC3)c3CCN(C)Cc3c2c1"
    reactant = predict(transformer, inp_sequence, max_length=160)
    print('Input Product:       {}'.format(inp_sequence))
    print('Predicted Reactants: {}'.format(reactant))
コード例 #3
0
inp = tf.random.uniform((BATCH_SIZE, MAX_SEQ_LENGTH))
tar_inp = tf.random.uniform((BATCH_SIZE, MAX_SEQ_LENGTH))

fn_out, _ = transformer(inp, tar_inp,
                        True,
                        enc_padding_mask=None,
                        look_ahead_mask=None,
                        dec_padding_mask=None)
print(tar_inp.shape)  # (batch_size, tar_seq_len)
print(fn_out.shape)  # (batch_size, tar_seq_len, target_vocab_size)
transformer.summary()



learning_rate = CustomSchedule(d_model)

optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98,
                                     epsilon=1e-9)



loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
                from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
コード例 #4
0
                         seq_max_len_target=SEQ_MAX_LEN_TARGET,
                         data_limit=DATA_LIMIT,
                         train_ratio=TRAIN_RATIO)

dataset, val_dataset = data_loader.load()

transformer = Transformer(inputs_vocab_size=BPE_VOCAB_SIZE,
                          target_vocab_size=BPE_VOCAB_SIZE,
                          encoder_count=ENCODER_COUNT,
                          decoder_count=DECODER_COUNT,
                          attention_head_count=ATTENTION_HEAD_COUNT,
                          d_model=D_MODEL,
                          d_point_wise_ff=D_POINT_WISE_FF,
                          dropout_prob=DROPOUT_PROB)

learning_rate = CustomSchedule(D_MODEL)
optimizer = tf.optimizers.Adam(learning_rate,
                               beta_1=0.9,
                               beta_2=0.98,
                               epsilon=1e-9)
loss_object = tf.losses.CategoricalCrossentropy(from_logits=True,
                                                reduction='none')

trainer = Trainer(
    model=transformer,
    dataset=dataset,
    loss_object=loss_object,
    optimizer=optimizer,
    batch_size=GLOBAL_BATCH_SIZE,
    distribute_strategy=strategy,
    vocab_size=BPE_VOCAB_SIZE,
コード例 #5
0
def main(model_index):
    if args.dataset == 'taxi':
        flow_max = parameters_nyctaxi.flow_train_max
    elif args.dataset == 'bike':
        flow_max = parameters_nycbike.flow_train_max
    else:
        raise Exception("Dataset should be taxi or bike")

    direct_test = False
    """ Model hyperparameters """
    num_layers = 4
    d_model = 64
    dff = 128
    num_heads = 8
    dropout_rate = 0.1
    cnn_layers = 3
    cnn_filters = 64
    print("num_layers: {}, d_model: {}, dff: {}, num_heads: {}, cnn_layers: {}, cnn_filters: {}" \
          .format(num_layers, d_model, dff, num_heads, cnn_layers, cnn_filters))
    """ Training settings"""
    load_saved_data = False
    save_ckpt = True
    BATCH_SIZE = 128
    MAX_EPOCHS = 500
    verbose_train = 1
    test_period = 1
    earlystop_epoch = 10
    earlystop_patience = 10
    earlystop_threshold = 1.0
    last_reshuffle_epoch = 0
    reshuffle_epochs = earlystop_patience
    reshuffle_cnt = 0
    start_from_ckpt = None
    lr_exp = 1
    warmup_steps = 4000
    print("BATCH_SIZE: {}, es_epoch: {}, patience: {}".format(
        BATCH_SIZE, earlystop_epoch, earlystop_patience))
    """ Data hyperparameters """
    num_weeks_hist = 0
    num_days_hist = 7
    num_intervals_hist = 3
    num_intervals_curr = 1
    num_intervals_before_predict = 1
    num_intervals_enc = (num_weeks_hist + num_days_hist
                         ) * num_intervals_hist + num_intervals_curr
    local_block_len = 3
    print(
        "num_weeks_hist: {}, num_days_hist: {}, num_intervals_hist: {}, num_intervals_curr: {}, num_intervals_before_predict: {}" \
        .format(num_weeks_hist, num_days_hist, num_intervals_hist, num_intervals_curr, num_intervals_before_predict))

    def result_writer(str):
        with open("results/stream_t_{}.txt".format(model_index), 'a+') as file:
            file.write(str)

    """ use mirrored strategy for distributed training """
    strategy = tf.distribute.MirroredStrategy()
    print('Number of GPU devices: {}'.format(strategy.num_replicas_in_sync))

    GLOBAL_BATCH_SIZE = BATCH_SIZE * strategy.num_replicas_in_sync

    train_dataset, val_dataset, test_dataset = \
        load_dataset(args.dataset,
                     load_saved_data,
                     GLOBAL_BATCH_SIZE,
                     num_weeks_hist,
                     num_days_hist,
                     num_intervals_hist,
                     num_intervals_curr,
                     num_intervals_before_predict,
                     local_block_len)

    train_dataset = strategy.experimental_distribute_dataset(train_dataset)
    val_dataset = strategy.experimental_distribute_dataset(val_dataset)
    test_dataset = strategy.experimental_distribute_dataset(test_dataset)

    with strategy.scope():

        loss_object = tf.keras.losses.MeanSquaredError(
            reduction=tf.keras.losses.Reduction.NONE)

        def loss_function(real, pred):
            loss_ = loss_object(real, pred)
            return tf.nn.compute_average_loss(
                loss_, global_batch_size=GLOBAL_BATCH_SIZE)

        train_rmse_1 = tf.keras.metrics.RootMeanSquaredError()
        train_rmse_2 = tf.keras.metrics.RootMeanSquaredError()
        train_rmse_3 = tf.keras.metrics.RootMeanSquaredError()
        train_rmse_4 = tf.keras.metrics.RootMeanSquaredError()

        test_rmse_1 = tf.keras.metrics.RootMeanSquaredError()
        test_rmse_2 = tf.keras.metrics.RootMeanSquaredError()
        test_rmse_3 = tf.keras.metrics.RootMeanSquaredError()
        test_rmse_4 = tf.keras.metrics.RootMeanSquaredError()

        learning_rate = CustomSchedule(d_model, lr_exp, warmup_steps)

        optimizer = tf.keras.optimizers.Adam(learning_rate,
                                             beta_1=0.9,
                                             beta_2=0.98,
                                             epsilon=1e-9)

        stream_t = Stream_T(num_layers, d_model, num_heads, dff, cnn_layers,
                            cnn_filters, 4, num_intervals_enc, dropout_rate)

        last_epoch = -1

        if save_ckpt:
            checkpoint_path = "./checkpoints/stream_t_{}".format(model_index)

            ckpt = tf.train.Checkpoint(Stream_T=stream_t, optimizer=optimizer)

            ckpt_manager = tf.train.CheckpointManager(
                ckpt,
                checkpoint_path,
                max_to_keep=(earlystop_patience + earlystop_epoch))

            ckpt_rec_flag = False

            if ckpt_manager.latest_checkpoint:
                ckpt_rec_flag = True
                if start_from_ckpt:
                    ckpt.restore(ckpt_manager.checkpoints[start_from_ckpt - 1])
                    last_epoch = start_from_ckpt
                elif len(ckpt_manager.checkpoints
                         ) >= earlystop_epoch + earlystop_patience:
                    ckpt.restore(
                        ckpt_manager.checkpoints[int(-1 - earlystop_patience /
                                                     test_period)])
                    last_epoch = len(
                        ckpt_manager.checkpoints) - earlystop_patience + 1
                elif len(ckpt_manager.checkpoints) > earlystop_epoch:
                    ckpt.restore(ckpt_manager.checkpoints[earlystop_epoch])
                    last_epoch = earlystop_epoch + 1
                else:
                    ckpt.restore(
                        ckpt_manager.checkpoints[len(ckpt_manager.checkpoints)
                                                 - 1])
                    last_epoch = len(ckpt_manager.checkpoints)
                print('Latest checkpoint restored!! At epoch {}'.format(
                    last_epoch))

        def train_step(inp, tar):
            x_hist = inp["trans_hist"]
            ex_hist = inp["ex_hist"]
            x_curr = inp["trans_curr"]
            ex_curr = inp["ex_curr"]

            ys_transitions = tar["ys_transitions"]

            with tf.GradientTape() as tape:
                predictions, _ = stream_t(x_hist,
                                          ex_hist,
                                          x_curr,
                                          ex_curr,
                                          training=True)
                loss = loss_function(ys_transitions, predictions)

            gradients = tape.gradient(loss, stream_t.trainable_variables)
            optimizer.apply_gradients(
                zip(gradients, stream_t.trainable_variables))

            train_rmse_1(ys_transitions[:, :, :, 0], predictions[:, :, :, 0])
            train_rmse_2(ys_transitions[:, :, :, 1], predictions[:, :, :, 1])
            train_rmse_3(ys_transitions[:, :, :, 2], predictions[:, :, :, 2])
            train_rmse_4(ys_transitions[:, :, :, 3], predictions[:, :, :, 3])

        def test_step(inp, tar, threshold):
            x_hist = inp["trans_hist"]
            ex_hist = inp["ex_hist"]
            x_curr = inp["trans_curr"]
            ex_curr = inp["ex_curr"]

            ys_transitions = tar["ys_transitions"]

            predictions, _ = stream_t(x_hist,
                                      ex_hist,
                                      x_curr,
                                      ex_curr,
                                      training=False)
            """ here we filter out all nodes where their real flows are less than 10 """
            real_1 = ys_transitions[:, :, :, 0]
            real_2 = ys_transitions[:, :, :, 1]
            real_3 = ys_transitions[:, :, :, 2]
            real_4 = ys_transitions[:, :, :, 3]
            pred_1 = predictions[:, :, :, 0]
            pred_2 = predictions[:, :, :, 1]
            pred_3 = predictions[:, :, :, 2]
            pred_4 = predictions[:, :, :, 3]
            mask_1 = tf.where(tf.math.greater(real_1, threshold))
            mask_2 = tf.where(tf.math.greater(real_2, threshold))
            mask_3 = tf.where(tf.math.greater(real_3, threshold))
            mask_4 = tf.where(tf.math.greater(real_4, threshold))
            masked_real_1 = tf.gather_nd(real_1, mask_1)
            masked_real_2 = tf.gather_nd(real_2, mask_2)
            masked_real_3 = tf.gather_nd(real_3, mask_3)
            masked_real_4 = tf.gather_nd(real_4, mask_4)
            masked_pred_1 = tf.gather_nd(pred_1, mask_1)
            masked_pred_2 = tf.gather_nd(pred_2, mask_2)
            masked_pred_3 = tf.gather_nd(pred_3, mask_3)
            masked_pred_4 = tf.gather_nd(pred_4, mask_4)
            test_rmse_1(masked_real_1, masked_pred_1)
            test_rmse_2(masked_real_2, masked_pred_2)
            test_rmse_3(masked_real_3, masked_pred_3)
            test_rmse_4(masked_real_4, masked_pred_4)

        @tf.function
        def distributed_test_step(inp, tar, threshold):
            strategy.experimental_run_v2(test_step,
                                         args=(
                                             inp,
                                             tar,
                                             threshold,
                                         ))

        def evaluate(val_dataset, flow_max, epoch, verbose=1):
            threshold = 10 / flow_max

            test_rmse_1.reset_states()
            test_rmse_2.reset_states()
            test_rmse_3.reset_states()
            test_rmse_4.reset_states()

            for (batch, (inp, tar)) in enumerate(val_dataset):

                distributed_test_step(inp, tar, threshold)

                if verbose and (batch + 1) % 100 == 0:
                    print(
                        'Epoch {} ValBatch {} RMSE_1 {:.6f} RMSE_2 {:.6f} RMSE_3 {:.6f} RMSE_4 {:.6f}'
                        .format(epoch + 1, batch + 1, test_rmse_1.result(),
                                test_rmse_2.result(), test_rmse_3.result(),
                                test_rmse_4.result()))

            if verbose:
                template = 'Epoch {} Total: RMSE_1 {:.6f} RMSE_2 {:.6f} RMSE_3 {:.6f} RMSE_4 {:.6f}\n'.format(
                    epoch + 1, test_rmse_1.result(), test_rmse_2.result(),
                    test_rmse_3.result(), test_rmse_4.result())
                result_writer(template)
                print(template)

            return test_rmse_1.result(), test_rmse_2.result(
            ), test_rmse_3.result(), test_rmse_4.result()

        @tf.function
        def distributed_train_step(inp, tar):
            strategy.experimental_run_v2(train_step, args=(
                inp,
                tar,
            ))

        if direct_test:
            print("Final Test Result: ")
            _, _, _, _ = evaluate(test_dataset, flow_max, -2)
        """ Start training... """
        if not direct_test:
            earlystop_flag = False
            skip_flag = False
            earlystop_helper = early_stop_helper(earlystop_patience,
                                                 test_period, earlystop_epoch,
                                                 earlystop_threshold)
            for epoch in range(MAX_EPOCHS):

                if reshuffle_cnt < 2 and (
                        epoch - last_reshuffle_epoch) == reshuffle_epochs:
                    train_dataset, val_dataset, test_dataset = \
                        load_dataset(args.dataset,
                                     True,
                                     GLOBAL_BATCH_SIZE,
                                     num_weeks_hist,
                                     num_days_hist,
                                     num_intervals_hist,
                                     num_intervals_curr,
                                     num_intervals_before_predict,
                                     local_block_len)

                    train_dataset = strategy.experimental_distribute_dataset(
                        train_dataset)
                    val_dataset = strategy.experimental_distribute_dataset(
                        val_dataset)
                    test_dataset = strategy.experimental_distribute_dataset(
                        test_dataset)

                    last_reshuffle_epoch = epoch
                    reshuffle_epochs = int(reshuffle_epochs * 1.2)
                    reshuffle_cnt += 1

                if ckpt_rec_flag and (epoch + 1) < last_epoch:
                    skip_flag = True
                    continue

                start = time.time()

                train_rmse_1.reset_states()
                train_rmse_2.reset_states()
                train_rmse_3.reset_states()
                train_rmse_4.reset_states()

                for (batch, (inp, tar)) in enumerate(train_dataset):
                    if skip_flag:
                        break

                    distributed_train_step(inp, tar)

                    if (batch + 1) % 100 == 0 and verbose_train:
                        print(
                            'Epoch {} Batch {} RMSE_1 {:.6f} RMSE_2 {:.6f} RMSE_3 {:.6f} RMSE_4 {:.6f}'
                            .format(epoch + 1,
                                    batch + 1, train_rmse_1.result(),
                                    train_rmse_2.result(),
                                    train_rmse_3.result(),
                                    train_rmse_4.result()))

                if not skip_flag and verbose_train:
                    template = 'Epoch {} RMSE_1 {:.6f} RMSE_2 {:.6f} RMSE_3 {:.6f} RMSE_4 {:.6f}'.format(
                        epoch + 1, train_rmse_1.result(),
                        train_rmse_2.result(), train_rmse_3.result(),
                        train_rmse_4.result())
                    print(template)
                    result_writer(template + '\n')

                if (epoch + 1) > earlystop_epoch and (epoch +
                                                      1) % test_period == 0:
                    print("Validation Result: ")
                    rmse_value_1, rmse_value_2, rmse_value_3, rmse_value_4 = evaluate(
                        val_dataset, flow_max, epoch)
                    earlystop_flag = earlystop_helper.check(
                        rmse_value_1 + rmse_value_2,
                        rmse_value_3 + rmse_value_4, epoch)
                    print("Best epoch {}\n".format(
                        earlystop_helper.get_bestepoch()))
                    result_writer("Best epoch {}\n".format(
                        earlystop_helper.get_bestepoch()))

                if not skip_flag and save_ckpt and epoch % test_period == 0:
                    ckpt_save_path = ckpt_manager.save()
                    print('Saving checkpoint for epoch {} at {}\n'.format(
                        epoch + 1, ckpt_save_path))

                if not skip_flag and earlystop_flag:
                    print("Early stoping...")
                    if save_ckpt:
                        ckpt.restore(
                            ckpt_manager.checkpoints[int(-1 -
                                                         earlystop_patience /
                                                         test_period)])
                        print('Checkpoint restored!! At epoch {}'.format(
                            int(epoch + 1 - earlystop_patience / test_period)))
                    break

                skip_flag = False

                print('Time taken for 1 epoch: {} secs\n'.format(time.time() -
                                                                 start))

            print("Final Test Result: ")
            _, _, _, _ = evaluate(test_dataset, flow_max, epoch)
コード例 #6
0
ファイル: pretrain.py プロジェクト: mridul911/model-zoo-1
num_layers = args.num_layers
hidden_size = args.hidden_size
dff = 4 * hidden_size
num_heads = args.num_heads
max_length = args.max_length

BATCH_SIZE = args.batch_size
EPOCHS = args.epochs

########### Define Tokenizer ############
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

input_vocab_size = len(tokenizer.vocab)

########### Define Learning rate and optimzer ########
learning_rate = CustomSchedule(hidden_size)

optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-9) # values as in the paper

########### Define Loss function #######
bce_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True) # for NSP
sce_loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True) # for MLM

def loss_function(nsp, mlm, is_next, seg_input, masked):
  nsp_result = bce_loss(is_next, nsp)

  mlm_result = 0
  for i in range(len(masked)):
    seg_val = 0
    for j in range(len(masked[i])):
      if(seg_input[i][j] < seg_val):
コード例 #7
0
tar_inp = tf.random.uniform((BATCH_SIZE, MAX_SEQ_LENGTH))
fn_out, _ = transformer(inp, tar_inp,
                        True,
                        enc_padding_mask=None,
                        look_ahead_mask=None,
                        dec_padding_mask=None)
print(tar_inp.shape)  # (batch_size, tar_seq_len)
print(fn_out.shape)  # (batch_size, tar_seq_len, target_vocab_size) 

# init bert pre-trained weights
transformer.restore_encoder(bert_ckpt_file)
transformer.summary()



learning_rate = CustomSchedule(config.d_model)

optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98,
                                     epsilon=1e-9)



loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask