Ejemplo n.º 1
0
def tester(model, configs, eager = False, plotAttention = False, with_trainer = True):
    num_samples_tests = configs['num_samples_tests']
    correctPredictions = 0
    wrongPredictions = 0

    trainEncoderInput, _, _  = generateDataset(
        configs['num_samples_tests'],
        configs['sample_length'],
        configs['min_value'],
        configs['max_value'],
        configs['SOS_CODE'],
        configs['EOS_CODE'],
        configs['vocab_size'])
    for _, inputEntry in enumerate(trainEncoderInput):
        print('__________________________________________________')

        # print number sequence without EOS
        print(list(inputEntry.numpy().astype("int16")[1:]))

        # Generate correct answer
        correctAnswer = list(inputEntry.numpy().astype("int16"))
        # Remove EOS, sort the numbers and print the correct result
        correctAnswer = correctAnswer[1:]
        correctAnswer.sort()
        print(correctAnswer)

        # Add the batch dimension [batch=1, features]
        inputEntry = tf.expand_dims(inputEntry, 0)

        # Run the inference and generate predicted output
        predictedAnswer, attention_weights = runSeq2SeqInference(
            model,
            inputEntry,
            configs['vocab_size'],
            configs['input_length'],
            configs['max_value'], 
            configs['SOS_CODE'], 
            configs['EOS_CODE'],
            eager,
            with_trainer)
        if (plotAttention == True):
            plotAttention(attention_weights, inputEntry)
        print(predictedAnswer)

        # Compute the diff between the correct answer and predicted
        # If diff is equal to 0 then numbers are sorted correctly
        diff = []
        for index, _ in enumerate(correctAnswer):
            diff.append(correctAnswer[index] - predictedAnswer[index])

        # If all numbers are equal to 0
        if (all(result == 0 for (result) in diff)):
            correctPredictions += 1
            print('______________________OK__________________________')
        else:
            wrongPredictions += 1
            print('_____________________WRONG!_______________________')

    print(
        f"Correct Predictions: {correctPredictions/num_samples_tests} || Wrong Predictions: {wrongPredictions/num_samples_tests}")
Ejemplo n.º 2
0
def main(plotAttention=False, maskingModel=True) -> None:
    # Get the configs
    configs = get_configs(sys.argv)

    print('Generating Dataset...')
    # generate training dataset
    trainEncoderInput, _, trainDecoderOutput = generateDataset(
        configs['num_samples_training'], configs['sample_length'],
        configs['min_value'], configs['max_value'], configs['SOS_CODE'],
        configs['EOS_CODE'], configs['vocab_size'])

    print('Dataset Generated!')

    loss_fn = tf.losses.CategoricalCrossentropy()
    optimizer = tf.optimizers.Adam()

    if not maskingModel:
        model = EagerVanilla(configs['input_length'], configs['vocab_size'],
                             configs['embedding_dims'], configs['lstm_units'],
                             configs['SOS_CODE'])
    else:
        model = EagerMasking(configs['input_length'], configs['vocab_size'],
                             configs['embedding_dims'], configs['lstm_units'],
                             configs['SOS_CODE'])

    model(trainEncoderInput)

    losses = []

    print('Training...')
    batch_size = configs['batch_size']
    num_batches = int(configs['num_samples_training'] / batch_size)
    for epoch in range(configs['num_epochs']):
        loss_per_epoch = []
        for i in range(num_batches - 1):
            enc_in_batch = trainEncoderInput[i * batch_size:(i + 1) *
                                             batch_size]
            # dec_in_batch = trainDecoderInput[i * batch_size: (i+1) * batch_size]
            dec_out_batch = trainDecoderOutput[i * batch_size:(i + 1) *
                                               batch_size]

            with tf.GradientTape() as tape:
                predicted = model(enc_in_batch)
                loss = loss_fn(dec_out_batch, predicted)
                # Store the loss
                loss_per_epoch.append(loss)

            grads = tape.gradient(loss, model.trainable_variables)

            optimizer.apply_gradients(zip(grads, model.trainable_variables))

        epoch_loss = np.asarray(loss_per_epoch).mean()
        print(f"Epoch: {epoch+1} avg. loss: {epoch_loss}")
        losses.append(epoch_loss)

    # print(losses)

    print('Testing...')
    tester(model, configs, eager=True, with_trainer=False)
Ejemplo n.º 3
0
    def test_generate(self):
        num_samples = 2  # number of samples to generate
        sample_length = 10  # Length of input sequence
        max_value = 100  # Upper bound (range.random()) to generate a number
        vocab_size = max_value + 2  # +2 for SOS and EOS

        trainEncoderInput, trainDecoderInput, trainDecoderOutput = generateDataset(
            num_samples, sample_length, max_value, vocab_size)

        self.assertEqual(len(trainEncoderInput), 2)
        self.assertEqual(len(trainDecoderInput), 2)
        self.assertEqual(len(trainDecoderOutput), 2)
Ejemplo n.º 4
0
def runner():
    encoderEmbeddingInput, decoderEmbeddingInput, trainDecoderOutput = generateDataset(num_samples, sample_length, max_value, vocab_size)

    print(encoderEmbeddingInput)
    print(decoderEmbeddingInput)
    print(trainDecoderOutput)

    encoder = Encoder(vocab_size, embeddingDims, lstmUnits)
    encoderHiddenStates, encoderLastHiddenState, encoderLastCarryState = encoder(encoderEmbeddingInput)

    decoder = Decoder(vocab_size, embeddingDims, lstmUnits)
    decoderOutput = decoder(decoderEmbeddingInput, [encoderLastHiddenState, encoderLastCarryState], encoderHiddenStates)

    return 1
Ejemplo n.º 5
0
def main(plotAttention=False) -> None:
    # Get the configs
    configs = get_configs(sys.argv)

    print('Generating Dataset...')
    # generate training dataset
    trainEncoderInput, trainDecoderInput, trainDecoderOutput = generateDataset(
        configs['num_samples_training'], configs['sample_length'],
        configs['min_value'], configs['max_value'], configs['SOS_CODE'],
        configs['EOS_CODE'], configs['vocab_size'])

    # generate validation dataset
    valEncoderInput, valDecoderInput, valDecoderOutput = generateDataset(
        configs['num_samples_validation'], configs['sample_length'],
        configs['min_value'], configs['max_value'], configs['SOS_CODE'],
        configs['EOS_CODE'], configs['vocab_size'])
    print('Dataset Generated!')

    # Create model
    model = model_factory(configs['model_name'], configs['vocab_size'],
                          configs['input_length'], configs['embedding_dims'],
                          configs['lstm_units'])
    model.summary(line_length=180)

    print('Training...')
    model.fit(
        x=[trainEncoderInput, trainDecoderInput],
        y=trainDecoderOutput,
        epochs=configs['num_epochs'],
        batch_size=configs['batch_size'],
        shuffle=True,
        validation_data=([valEncoderInput, valDecoderInput], valDecoderOutput),
        # callbacks = [tensorboard_callback]
    )

    print('Testing...')
    tester(model, configs)
Ejemplo n.º 6
0
    def test_tensorContents(self):
        num_samples = 2  # number of samples to generate
        sample_length = 10  # Length of input sequence
        max_value = 100  # Upper bound (range.random()) to generate a number
        vocab_size = max_value + 2  # +2 for SOS and EOS

        trainEncoderInput, trainDecoderInput, trainDecoderOutput = generateDataset(
            num_samples, sample_length, max_value, vocab_size)

        encoderInput = trainEncoderInput[0].numpy()
        decoderInput = trainDecoderInput[0].numpy()

        # Sorted Encoder input should be equal to Decoder's Input
        # The SOS and EOS should are not considered
        self.assertTrue(np.all(np.sort(encoderInput[1:]) == decoderInput[1:]))
Ejemplo n.º 7
0
def train(EPOCHS=2, batch_size=128):
    # Generate dataset

    print('Generating dataset...')
    enc_input, dec_input, dec_out = generateDataset(
        configs['num_samples_training'], configs['sample_length'],
        configs['min_value'], configs['max_value'], configs['SOS_CODE'],
        configs['EOS_CODE'], configs['vocab_size'])
    print('Dataset generated!')
    # test(test_data)

    num_batches = int(len(enc_input) / batch_size)

    for epoch in range(EPOCHS):
        start = time.time()

        train_loss.reset_states()
        train_accuracy.reset_states()

        for batch in range(num_batches):
            inp = enc_input[batch * batch_size:(batch + 1) * batch_size]
            # tar = dec_input[batch * batch_size: (batch+1) * batch_size]
            real = dec_out[batch * batch_size:(batch + 1) * batch_size]

            train_step(inp, real)

            if batch % 50 == 0:
                print('Epoch {} Batch {} Loss {:.6f} Accuracy {:.6f}'.format(
                    epoch + 1, batch, train_loss.result(),
                    train_accuracy.result()))

            # if (epoch + 1) % 5 == 0:
            #   ckpt_save_path = ckpt_manager.save()
            #   print ('Saving checkpoint for epoch {} at {}'.format(epoch+1,
            #                                                       ckpt_save_path))

        print('Epoch {} Loss {:.6f} Accuracy {:.6f}'.format(
            epoch + 1, train_loss.result(), train_accuracy.result()))