Example #1
0
def create_model(params, is_train):
    with tf.name_scope('model'):
        if is_train:
            inputs = layers.Input((None, ), dtype=tf.int64, name='inputs')
            targets = layers.Input((None, ), dtype=tf.int64, name='targets')
            internal_model = Transformer(params, name='transformer')
            logits = internal_model([inputs, targets], training=is_train)
            vocab_size = params['vocab_size']
            label_smoothing = params['label_smoothing']
            if params['enable_metrics_in_training']:
                logits = metrics.MetricLayer(vocab_size)([logits, targets])
            logits = layers.Lambda(lambda x: x,
                                   name='logits',
                                   dtype=tf.float32)(logits)
            model = Model([inputs, targets], logits)
            # TODO: Can we do this loss in float16 instead of float32?
            loss = metrics.transformer_loss(logits, targets, label_smoothing,
                                            vocab_size)
            model.add_loss(loss)
            return model

        else:
            inputs = layers.Input((None, ), dtype=tf.int64, name='inputs')
            internal_model = Transformer(params, name='transformer')
            ret = internal_model([inputs], training=is_train)
            outputs, scores = ret['outputs'], ret['scores']
            return Model(inputs, [outputs, scores])
 def generator_loss(fake_output, generated_output, targets):
     discriminator_loss = cross_entropy(tf.ones_like(fake_output),
                                        fake_output)
     logits = metrics.MetricLayer(
         self.vocab_size)([generated_output, targets])
     logits, generator_loss = metrics.LossLayer(self.vocab_size,
                                                0.1)([logits, targets])
     return discriminator_loss + generator_loss, generator_loss
    def test_basic_simpleSeq2Seq(self):
        trace_path = "checkpoints_tl/logging/loss"
        vocabulary_size = 64
        emb_dim = 32
        model_ = Seq2seqLuongAttention(hidden_size=128,
                                       embedding_layer=tl.layers.Embedding(
                                           vocabulary_size=vocabulary_size,
                                           embedding_size=emb_dim),
                                       cell=tf.keras.layers.GRUCell,
                                       method="dot")

        # print(", ".join(x for x in [t.name for t in model_.trainable_weights]))

        self.vocab_size = 64
        optimizer = tf.optimizers.Adam(learning_rate=0.01)
        for epoch in range(self.num_epochs):
            model_.train()
            t = time.time()
            trainX, trainY = shuffle(self.trainX, self.trainY)
            total_loss, n_iter = 0, 0
            for X, Y in tqdm(tl.iterate.minibatches(inputs=trainX,
                                                    targets=trainY,
                                                    batch_size=self.batch_size,
                                                    shuffle=False),
                             total=self.n_step,
                             desc='Epoch[{}/{}]'.format(
                                 epoch + 1, self.num_epochs),
                             leave=False):

                with tf.GradientTape() as tape:
                    dec_seq = Y[:, :-1]
                    targets = Y[:, 1:]
                    logits = model_(inputs=[X, dec_seq])
                    logits = metrics.MetricLayer(
                        self.vocab_size)([logits, targets])
                    logits, loss = metrics.LossLayer(self.vocab_size,
                                                     0.1)([logits, targets])

                    with tf.io.gfile.GFile(trace_path, "ab+") as trace_file:
                        trace_file.write(str(loss.numpy()) + '\n')
                    grad = tape.gradient(loss, model_.all_weights)
                    optimizer.apply_gradients(zip(grad, model_.all_weights))

                total_loss += loss
                n_iter += 1
            print(time.time() - t)
            # tl.files.save_npz(model_.all_weights, name='./model_v4.npz')
            model_.eval()
            test_sample = trainX[0:2, :]
            prediction = model_(inputs=[test_sample], seq_length=10, sos=0)
            print("Prediction: >>>>>  ", prediction, "\n Target: >>>>>  ",
                  trainY[0:2, 1:], "\n\n")

            print('Epoch [{}/{}]: loss {:.4f}'.format(epoch + 1,
                                                      self.num_epochs,
                                                      total_loss / n_iter))
    def train_step(inputs, targets):
        model.train()
        with tf.GradientTape() as tape:
            #print(inputs)

            logits = model(inputs=inputs, targets=targets)
            logits = metrics.MetricLayer(params.vocab_size)([logits, targets])
            logits, loss = metrics.LossLayer(params.vocab_size,
                                             0.1)([logits, targets])

        gradients = tape.gradient(loss, model.all_weights)
        optimizer_.apply_gradients(zip(gradients, model.all_weights))
        return loss
    def train_step(inputs, targets):
        with tf.GradientTape() as tape:
            #print(inputs)

            logits = model(inputs=[inputs, targets], training=True)
            logits = metrics.MetricLayer(
                params["vocab_size"])([logits, targets])
            logits, loss = metrics.LossLayer(params["vocab_size"],
                                             0.1)([logits, targets])

        gradients = tape.gradient(loss, model.trainable_weights)
        optimizer_.apply_gradients(zip(gradients, model.trainable_weights))
        return loss
    def train_step(inputs, targets):
        model.train()
        with tf.GradientTape() as tape:
            decoder_inputs = tf.pad(targets,
                                [[0, 0], [1, 0]])[:, :-1]
            logits = model(inputs=[inputs, decoder_inputs])
            logits = metrics.MetricLayer(params.vocab_size)([logits, targets])
            logits, loss = metrics.LossLayer(params.vocab_size, 0.1)([logits, targets])

            
        gradients = tape.gradient(loss, model.all_weights)
        optimizer_.apply_gradients(zip(gradients, model.all_weights))
        return loss
    def test_basic_simpleSeq2Seq(self):
        model_ = Transformer(TINY_PARAMS)
        self.vocab_size = TINY_PARAMS.vocab_size
        optimizer = tf.optimizers.Adam(learning_rate=0.01)

        for epoch in range(self.num_epochs):
            model_.train()
            t = time.time()
            trainX, trainY = shuffle(self.trainX, self.trainY)
            total_loss, n_iter = 0, 0
            for X, Y in tqdm(tl.iterate.minibatches(inputs=trainX,
                                                    targets=trainY,
                                                    batch_size=self.batch_size,
                                                    shuffle=False),
                             total=self.n_step,
                             desc='Epoch[{}/{}]'.format(
                                 epoch + 1, self.num_epochs),
                             leave=False):

                with tf.GradientTape() as tape:

                    targets = Y
                    logits = model_(inputs=X, targets=Y)
                    logits = metrics.MetricLayer(
                        self.vocab_size)([logits, targets])
                    logits, loss = metrics.LossLayer(self.vocab_size,
                                                     0.1)([logits, targets])

                    grad = tape.gradient(loss, model_.all_weights)
                    optimizer.apply_gradients(zip(grad, model_.all_weights))

                total_loss += loss
                n_iter += 1
            print(time.time() - t)
            model_.eval()
            test_sample = trainX[0:2, :]
            model_.eval()
            prediction = model_(inputs=test_sample)
            print("Prediction: >>>>>  ", prediction["outputs"],
                  "\n Target: >>>>>  ", trainY[0:2, :], "\n\n")

            print('Epoch [{}/{}]: loss {:.4f}'.format(epoch + 1,
                                                      self.num_epochs,
                                                      total_loss / n_iter))
    def test_basic_simpleSeq2Seq(self):
        trace_path = "checkpoints_tl/logging/loss"
        model_ = Transformer(TINY_PARAMS)

        # print(", ".join(x for x in [t.name for t in model_.trainable_weights]))

        self.vocab_size = TINY_PARAMS.vocab_size
        optimizer = tf.optimizers.Adam(learning_rate=0.01)
        for epoch in range(self.num_epochs):
            model_.train()
            t = time.time()
            trainX, trainY = shuffle(self.trainX, self.trainY)
            total_loss, n_iter = 0, 0
            for X, Y in tqdm(tl.iterate.minibatches(inputs=trainX, targets=trainY, batch_size=self.batch_size,
                                                    shuffle=False), total=self.n_step,
                             desc='Epoch[{}/{}]'.format(epoch + 1, self.num_epochs), leave=False):

                with tf.GradientTape() as tape:

                    targets = Y
                    logits = model_(inputs = X, targets = Y)
                    logits = metrics.MetricLayer(self.vocab_size)([logits, targets])
                    logits, loss = metrics.LossLayer(self.vocab_size, 0.1)([logits, targets])
                    
                    with tf.io.gfile.GFile(trace_path, "ab+") as trace_file:
                        trace_file.write(str(loss.numpy())+'\n')
                    grad = tape.gradient(loss, model_.all_weights)
                    optimizer.apply_gradients(zip(grad, model_.all_weights))
                    
            
                total_loss += loss
                n_iter += 1
            print(time.time()-t)
            tl.files.save_npz(model_.all_weights, name='./model_v4.npz')
            model_.eval()
            test_sample = trainX[0:2, :]
            model_.eval()
            prediction = model_(inputs = test_sample)
            print("Prediction: >>>>>  ", prediction["outputs"], "\n Target: >>>>>  ", trainY[0:2, :], "\n\n")

            print('Epoch [{}/{}]: loss {:.4f}'.format(epoch + 1, self.num_epochs, total_loss / n_iter))
    def test_basic_simpleSeq2Seq(self):
        model_ = Transformer(params)
        self.vocab_size = params["vocab_size"]

        # optimizer_ = optimizer.LazyAdam(
        #     params["learning_rate"],
        #     params["optimizer_adam_beta1"],
        #     params["optimizer_adam_beta2"],
        #     epsilon=params["optimizer_adam_epsilon"])

        learning_rate = CustomSchedule(params["hidden_size"])
        optimizer_ = tf.optimizers.Adam(learning_rate=0.01)
        # optimizer_ = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98,
        #                              epsilon=1e-9)
        # optimizer_ = optimizer.LazyAdam(learning_rate, beta_1=0.9, beta_2=0.98,
        #                              epsilon=1e-9)

        for epoch in range(self.num_epochs):
            trainX, trainY = shuffle(self.trainX, self.trainY)
            total_loss, n_iter = 0, 0
            for X, Y in tqdm(tl.iterate.minibatches(inputs=trainX,
                                                    targets=trainY,
                                                    batch_size=self.batch_size,
                                                    shuffle=False),
                             total=self.n_step,
                             desc='Epoch[{}/{}]'.format(
                                 epoch + 1, self.num_epochs),
                             leave=False):

                with tf.GradientTape() as tape:

                    targets = Y
                    output = model_(inputs=[X, Y], training=True)
                    # print(len(model_.trainable_weights))
                    # print(model_.trainable_weights)
                    # exit()
                    # print(logits.shape, Y.shape)
                    logits = metrics.MetricLayer(
                        self.vocab_size)([output, targets])
                    logits, loss = metrics.LossLayer(self.vocab_size,
                                                     0.1)([logits, targets])
                    # logits = tf.keras.layers.Lambda(lambda x: x, name="logits")(logits)
                    # print(time.time()-start)
                    # output = tf.reshape(output, [-1, output.shape[-1]])
                    # print(", ".join([t.name for t in model_.trainable_weights]))
                    # layer_normalization_print = [x for x in [t.name for t in model_.trainable_weights] if "feed_forward_network" in x ]
                    # print(", ".join(x for x in [t.name for t in model_.trainable_weights] if "feed_forward_network" in x ))
                    # print("number of layers : ", len(model_.trainable_weights))
                    # exit()
                    # loss = cross_entropy_seq(logits=output, target_seqs=Y)

                    grad = tape.gradient(loss, model_.trainable_weights)
                    # print(grad)
                    # exit()
                    optimizer_.apply_gradients(
                        zip(grad, model_.trainable_weights))
                    # print(time.time()-start)
                total_loss += loss
                n_iter += 1

            test_sample = trainX[0:2, :]

            top_n = 1
            for i in range(top_n):
                prediction = model_(inputs=[test_sample], training=False)
                print("Prediction: >>>>>  ", prediction["outputs"],
                      "\n Target: >>>>>  ", trainY[0:2, :], "\n\n")

            # printing average loss after every epoch
            print('Epoch [{}/{}]: loss {:.4f}'.format(epoch + 1,
                                                      self.num_epochs,
                                                      total_loss / n_iter))