Python load_dataset Examples, utils.dataset_utils.load_dataset Python Examples

Example #1

0

Show file

File: cgan_emoji.py Project: anish-jangra/textToEmojiGAN

def generate_mode():
    img_size = (64, 64, 3)
    img_path = './emoji/edited/emoji_64x64/'
    txt_path = './emoji/description/detailed'
    glove_path = './utils/glove.6B.300d.txt'

    dcgan = DCGAN(img_path, txt_path, glove_path)
    X_train, Captions, _, _, _ = load_dataset(img_path, txt_path, img_size, split_rate=0.0000000000000000001)
    bs = X_train.shape[0]
    print('Loading model...')
    dcgan.load_model()

    iteration = 0
    caption_list = []
    print('Generating images...')
    for image, caption in zip(X_train, Captions):
        edited_image = image * 127.5 + 127.5
        edited_image = Image.fromarray(edited_image.astype(np.uint8))
        edited_image.save('./images/original/' + str(iteration) + '.png')
        generated_image = dcgan.generate_image_from_text(caption)
        generated_image.save('./images/output/' + str(iteration) + '.png')
        caption_list.append([str(caption)])
        iteration += 1

    df = pd.DataFrame(caption_list, columns=['caption'])
    df.to_csv('./images/caption.csv')

    # plot all emojis
    dcgan.save_imgs(epoch=5000, texts=Captions, batch_size=bs)
    print('Done!')

Example #2

0

Show file

from utils.dataset_utils import load_dataset
from utils.model_utils import add_categorical_loss
from models import classification_model, createNaiveModel
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import losses
from tensorflow.keras import metrics
import tensorflow_federated as tff
import tensorflow as tf
import numpy as np
import os, collections

tf.keras.backend.set_floatx('float32')
os.environ[
    'TF_CPP_MIN_LOG_LEVEL'] = '2'  # restrain the unneedd tensorflow output

dataset_train = load_dataset(DATASET_FILENAME)
#Expect tf.data.Dataset returned
print(len(dataset_train.client_ids), dataset_train.output_types,
      dataset_train.output_shapes)

example_dataset = dataset_train.create_tf_dataset_for_client(
    dataset_train.client_ids[0])
example_element = iter(example_dataset).next()

print(example_element['label'].numpy())
print(example_element['pixels'].numpy().shape)


def preprocess(dataset):
    def element_fn(element):
        return collections.OrderedDict([

Example #3

0

Show file

File: run_test.py Project: sciforce/asr-pytorch

                        default=64,
                        help='Test batch size')
    parser.add_argument('--subset',
                        default='test',
                        choices=['train', 'dev', 'test'],
                        help='Dataset on which to run test.')
    parser.add_argument('--max_batches_count',
                        type=int,
                        default=None,
                        help='Maximal batches count to limit the test.')
    parser.add_argument('--beam_size', type=int, default=3, help='Beam size.')
    parser.add_argument('--verbose',
                        action='store_true',
                        help='Output predictions and targets.')
    args = parser.parse_args()
    test_data = load_dataset(Path(args.data_dir), subset=args.subset)
    if torch.cuda.is_available():
        device = 'cuda'
        logger.debug('Using CUDA')
    else:
        device = 'cpu'

    encoder = IPAEncoder(args.data_dir)
    PER = run_test(test_data,
                   device,
                   encoder.vocab,
                   args.checkpoint,
                   args.batch_size,
                   args.max_batches_count,
                   encoder if args.verbose else None,
                   beam_size=args.beam_size)

Example #4

0

Show file

File: cgan_emoji.py Project: anish-jangra/textToEmojiGAN

    def train(self, epochs, batch_size=26, save_interval=20):
        # load dataset
        X_train, Captions, X_test, Captions_test, Labels = load_dataset(self.img_path, self.txt_path, self.img_shape)
        caption_list_train = []
        caption_list_test = []
        for caption in Captions:
            caption_list_train.append([str(caption)])
        for caption in Captions_test:
            caption_list_test.append([str(caption)])
        df = pd.DataFrame(caption_list_train, columns=['caption'])
        df.to_csv('./saved_model/caption_train.csv')
        df = pd.DataFrame(caption_list_test, columns=['caption'])
        df.to_csv('./saved_model/caption_test.csv')

        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        batch_count = int(X_train.shape[0] / batch_size)
        history = []
        history_test = []

        for epoch in range(epochs):
            for batch_index in range(batch_count):
                # ---------------------
                #  Train Discriminator
                # ---------------------

                # Select a random half of images
                # idx = np.random.randint(0, X_train.shape[0], batch_size)
                imgs = X_train[batch_index * batch_size:(batch_index + 1) * batch_size]
                texts_input = Captions[batch_index * batch_size:(batch_index + 1) * batch_size]
                texts = self.glove_model.encode_docs(texts_input)

                # Sample noise and generate a batch of new images
                noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
                gen_imgs = self.generator.predict([noise, texts])

                # Train the discriminator (real classified as ones and generated as zeros)
                start = time.time()
                d_loss_real = self.discriminator.train_on_batch([imgs, texts], valid)
                d_loss_fake = self.discriminator.train_on_batch([gen_imgs, texts], fake)
                batch_time_d = time.time() - start
                d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

                # ---------------------
                #  Train Generator
                # ---------------------

                # Train the generator (wants discriminator to mistake images as real)
                start = time.time()
                g_loss = self.combined.train_on_batch([noise, texts], valid)
                batch_time_g = time.time() - start

                # Plot the progress
                batch_time = batch_time_d + batch_time_g
                print ("%d-%d [D loss: %f, acc.: %.2f%%] [G loss: %f] [Time: %f]" % (epoch, batch_index, d_loss[0], 100*d_loss[1], g_loss, batch_time))
                history.append([epoch, batch_index, d_loss[0], 100*d_loss[1], g_loss, batch_time])
            
            # Test the model
            texts_test = self.glove_model.encode_docs(Captions_test)
            noise_test = np.random.normal(0, 1, (batch_size, self.latent_dim))
            gen_imgs_test = self.generator.predict([noise_test, texts_test])
            start = time.time()
            d_loss_real_test = self.discriminator.test_on_batch([X_test, texts_test], valid)
            d_loss_fake_test = self.discriminator.test_on_batch([gen_imgs_test, texts_test], fake)
            batch_time_d_test = time.time() - start
            d_loss_test = 0.5 * np.add(d_loss_real_test, d_loss_fake_test)
            start = time.time()
            g_loss_test = self.combined.test_on_batch([noise_test, texts_test], valid)
            batch_time_g_test = time.time() - start

            # Plot the test progress
            batch_time_test = batch_time_d_test + batch_time_g_test
            print ("%d (test) [D loss: %f, acc.: %.2f%%] [G loss: %f] [Time: %f]" % (epoch, d_loss_test[0], 100*d_loss_test[1], g_loss_test, batch_time_test))
            history_test.append([epoch, d_loss_test[0], 100*d_loss_test[1], g_loss_test, batch_time_test])

            # If at save interval => save generated image samples & training weights
            if epoch % save_interval == 0:
                idx = np.random.randint(0, X_train.shape[0], batch_size)
                texts_input = Captions[idx]
                texts = self.glove_model.encode_docs(texts_input)
                self.save_imgs(epoch, texts)

                self.generator.save_weights(filepath='./saved_model/generator_weights_' + str(epoch) + '.h5')
                self.discriminator.save_weights(filepath='./saved_model/discriminator_weights_' + str(epoch) + '.h5')
        
        # save weights & history
        df_train = pd.DataFrame(history, columns=['epoch', 'batch', 'd_loss', 'acc', 'g_loss', 'time[sec]'])
        df_train.to_csv('./saved_model/history.csv')
        df_test = pd.DataFrame(history_test, columns=['epoch', 'd_loss', 'acc', 'g_loss', 'time[sec]'])
        df_test.to_csv('./saved_model/history_test.csv')
        self.generator.save_weights(filepath='./saved_model/generator_weights.h5')
        self.discriminator.save_weights(filepath='./saved_model/discriminator_weights.h5')

Example #5

0

Show file

File: trainer.py Project: sciforce/asr-pytorch

                    str(Path(checkpoint_dir) / f'checkpoint_{global_step}'))


if __name__ == '__main__':
    from argparse import ArgumentParser
    parser = ArgumentParser()
    parser.add_argument('--data_dir',
                        type=str,
                        required=True,
                        help='Path to directory with CSV files.')
    parser.add_argument('--model_dir',
                        type=str,
                        required=True,
                        help='Path to directory with checkpoints.')
    parser.add_argument('--start_checkpoint',
                        type=str,
                        default=None,
                        help='Checkpoint to start training from.')
    args = parser.parse_args()
    train_data = load_dataset(Path(args.data_dir), subset='train')
    val_data = load_dataset(Path(args.data_dir), subset='dev')
    if torch.cuda.is_available():
        device = 'cuda'
        logger.debug('Using CUDA')
    else:
        device = 'cpu'

    encoder = IPAEncoder(args.data_dir)
    do_train(train_data, val_data, device, encoder.vocab, args.model_dir,
             args.start_checkpoint)