def load_ciarp(dataset_name):
    """
    Load ciarp dataset.

    Returns (x, y): as dataset x and y.

    """
    dataset_path = '/develop/data/{}/data'.format(dataset_name)

    if not os.path.exists(dataset_path):
        os.makedirs(dataset_path)
    data = hd.load(dataset_name, dataset_path)

    # TODO: define best way to do this

    x_train, y_train = data['train_Kinect_WithoutGabor']
    x_test, y_test = data['test_Kinect_WithoutGabor']

    x = np.concatenate((x_train, x_test), axis=0)
    y = np.concatenate((y_train, y_test), axis=0)

    return x, y
Example #2
0
def load_lsa16(dataset_name):
    """
    Load lsa16 dataset.

    Returns (x, y): as dataset x and y.

    """
    DATASET_PATH = '/develop/data/{}/data'.format(dataset_name)

    if not os.path.exists(DATASET_PATH):
        os.makedirs(DATASET_PATH)
    data = hd.load(dataset_name, DATASET_PATH)

    # TODO: define best way to do this

    x_train, y_train = data['train_Kinect_WithoutGabor']
    x_test, y_test = data['test_Kinect_WithoutGabor']

    X = np.concatenate((x_train, x_test), axis=0)
    y = np.concatenate((y_train, y_test), axis=0)

    return X, y
Example #3
0
def load_rwth(data_dir, config, splits):
    """
    Load rwth dataset.

    Args:
        data_dir (str): path of the directory with 'splits', 'data' subdirs.
        config (dict): general dict with program settings.
        splits (list): list of strings 'train'|'val'|'test'

    Returns (dict): dictionary with keys as splits and values as tf.Dataset

    """

    DATASET_NAME = "rwth"
    DATASET_PATH = "/develop/data/rwth/data"

    data = hd.load(DATASET_NAME, DATASET_PATH)

    features = data[0]
    classes = data[1]['y']

    good_min = 20
    good_classes = []

    for i in range(len(classes)):
        images = features[np.equal(i, classes)]
        if len(images) >= good_min:
            good_classes = good_classes + [i]

    good_x = features[np.in1d(classes, good_classes)]
    good_y = classes[np.in1d(classes, good_classes)]
    my_dict = dict(zip(np.unique(good_y), range(len(np.unique(good_y)))))
    good_y = np.vectorize(my_dict.get)(good_y)

    features, classes = good_x, good_y

    uniqueClasses = np.unique(classes)

    x_train, x_test, y_train, y_test = train_test_split_balanced(
        features,
        classes,
        train_size=config['data.train_size'],
        test_size=config['data.test_size'])
    x_train, x_test = x_train / 255.0, x_test / 255.0

    _, amountPerTrain = np.unique(y_train, return_counts=True)
    _, amountPerTest = np.unique(y_test, return_counts=True)

    train_datagen_args = dict(
        featurewise_center=True,
        featurewise_std_normalization=True,
        rotation_range=config['data.rotation_range'],
        width_shift_range=config['data.width_shift_range'],
        height_shift_range=config['data.height_shift_range'],
        horizontal_flip=config['data.horizontal_flip'],
        fill_mode='constant',
        cval=0)
    train_datagen = ImageDataGenerator(train_datagen_args)
    train_datagen.fit(x_train)

    test_datagen_args = dict(featurewise_center=True,
                             featurewise_std_normalization=True,
                             fill_mode='constant',
                             cval=0)
    test_datagen = ImageDataGenerator(test_datagen_args)
    test_datagen.fit(x_train)

    w, h, c = list(map(int, config['model.x_dim'].split(',')))

    ret = {}
    for split in splits:
        # n_way (number of classes per episode)
        if split in ['val', 'test']:
            n_way = config['data.test_way']
        else:
            n_way = config['data.train_way']

        # n_support (number of support examples per class)
        if split in ['val', 'test']:
            n_support = config['data.test_support']
        else:
            n_support = config['data.train_support']

        # n_query (number of query examples per class)
        if split in ['val', 'test']:
            n_query = config['data.test_query']
        else:
            n_query = config['data.train_query']

        if split in ['val', 'test']:
            y = y_test
            x = x_test
            dg = train_datagen
            dg_args = train_datagen_args
        else:
            y = y_train
            x = x_train
            dg = test_datagen
            dg_args = test_datagen_args

        amountPerClass = amountPerTest if split in ['val', 'test'
                                                    ] else amountPerTrain

        i = np.argsort(y)
        x = x[i, :, :, :]

        for index in i:
            x[index, :, :, :] = dg.apply_transform(x[index], dg_args)

        data = np.reshape(x,
                          (len(uniqueClasses), amountPerClass[0], 132, 92, 3))

        data_loader = DataLoader(data,
                                 n_classes=len(uniqueClasses),
                                 n_way=n_way,
                                 n_support=n_support,
                                 n_query=n_query,
                                 x_dim=(w, h, c))

        ret[split] = data_loader

    return ret
Example #4
0
    def train(self, dataset_id, epochs, batch_size=128, save_interval=50):

        # Load the dataset
        #(X_train, _), (_, _) = mnist.load_data()
        x, metadata = hd.load(dataset_id)
        X_train, X_test, Y_train, Y_test = self.split(
            parameters.get_split_value(dataset_id), x, metadata['y'])
        # Rescale -1 to 1
        X_train = (X_train.astype(np.float32) - 127.5) / 127.5
        #X_train = np.expand_dims(X_train, axis=3)

        #half_batch = int(batch_size / 2)

        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random half batch of images
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs, labels = X_train[idx], Y_train[idx]

            noise = np.random.normal(0, 1, (batch_size, self.noise_value))

            # Generate a half batch of new images
            gen_imgs = self.generator.predict([noise, labels])

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch([imgs, labels],
                                                            valid)
            d_loss_fake = self.discriminator.train_on_batch([gen_imgs, labels],
                                                            fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------
            sampled_labels = np.random.randint(0, self.classes,
                                               batch_size).reshape(-1, 1)

            # The generator wants the discriminator to label the generated samples
            # as valid (ones)
            #valid_y = np.array([1] * batch_size)

            # Train the generator
            g_loss = self.combined.train_on_batch([noise, sampled_labels],
                                                  valid)

            # Plot the progress
            print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" %
                  (epoch, d_loss[0], 100 * d_loss[1], g_loss))

            # If at save interval => save generated image samples
            if epoch % save_interval == 0:
                self.save_imgs(epoch)
        save_path = os.path.join(default_folder, self.name)
        self.base_model.save(
            os.path.join(save_path,
                         f"{self.name}_GANdiscriminator{epochs}.h5"))
        self.base_model.save_weights(
            os.path.join(save_path,
                         f"{self.name}_GANdiscriminator{epochs}_weights.h5"))
        print("Saved model to disk")
Example #5
0
            asl_class += 1
        if (index == 62):
            asl_class += 2
        if (index == 86):
            asl_class += 1
        if (index == -1):
            worksheet.write(0, col, f"{index}")
            index += 1
        else:
            worksheet.write(0, col, f"{index}({asl_class})")
            worksheet.set_column(col, 32)
            index += 1
            asl_class += 1

        for i, dataset_id in enumerate(hd.ids()):
            x, metadata = hd.load(dataset_id)
            worksheet.write(i + 1, 0, dataset_id)
            worksheet.set_row(i + 1, 32)
            flag = np.zeros(metadata['y'].max() + 1)
            if (x.shape[3] == 1):
                x = np.repeat(x, 3, -1)
            for h in range(len(x)):
                clas = metadata['y'][h]

                if (flag[clas] == 0):
                    path_to_save = cache_path / f"{dataset_id}image{h}.png"

                if (dataset_id == "PugeaultASL_B"):
                    img_depth = np.zeros((x[h].shape[0], x[h].shape[1]),
                                         dtype='f8')
                    max_z = x[h].max()
Example #6
0
    def __init__(self, dataset_id, **kwargs):
        if 'version' in kwargs:
            ver = kwargs['version']
        if 'delete' in kwargs:
            supr = kwargs['delete']
        try:
            self.dataset = hd.load(dataset_id, version=ver, delete=supr)
        except:
            try:
                self.dataset = hd.load(dataset_id, version=ver)
            except:
                try:
                    self.dataset = hd.load(dataset_id, delete=supr)
                except:
                    self.dataset = hd.load(dataset_id)

        self.input_shape = self.dataset[0][0].shape

        self.img_rows = (self.input_shape[0] // 4) * 4
        self.img_cols = (self.input_shape[1] // 4) * 4

        self.channels = 3
        self.name = dataset_id
        if (self.name == "psl" or self.name == "indianB"):
            self.img_shape = (128, 128, self.channels)
            self.img_rows = 128
            self.img_cols = 128
        else:
            if (self.name == "indianA"):
                self.img_shape = (64, 64, self.channels)
                self.img_rows = 64
                self.img_cols = 64
            else:
                self.img_shape = (self.img_rows, self.img_cols, self.channels)
        self.classes = self.dataset[1]['y'].max() + 1
        self.noise_value = 100

        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminator
        self.base_model, self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy',
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        # Build and compile the generator
        self.generator = self.build_generator()
        noise = keras.layers.Input(shape=(self.noise_value, ))
        label = keras.layers.Input(shape=(1, ))
        img = self.generator([noise, label])

        #self.generator.compile(loss='binary_crossentropy', optimizer=optimizer)

        # The generator takes noise as input and generated imgs
        #z = keras.layers.Input(shape=(100,))
        #img = self.generator(z)

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The valid takes generated images as input and determines validity
        valid = self.discriminator([img, label])

        # The combined model  (stacked generator and discriminator) takes
        # noise as input => generates images => determines validity
        self.combined = Model([noise, label], valid)
        self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)
        self.path = default_folder
        if not os.path.exists(self.path):
            os.makedirs(self.path)
Example #7
0
def load_rwth(config, path=None):
    """
    Load rwth dataset.

    Returns (x, y): as dataset x and y.

    """

    train_size = config['data.train_size']
    test_size = config['data.test_size']
    n_train_per_class = config['data.n_train_per_class']
    n_test_per_class = config['data.n_test_per_class']

    if path == None:
        path = '/tf/data/{}'.format(config['data.dataset'])
        data_dir = os.path.join(path, 'data')

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    data = hd.load(config['data.dataset'], Path(data_dir))

    if config['data.split']:
        split_dir = os.path.join(path, 'splits', config['data.split'])

        def split_file(split):
            return os.path.join(split_dir, f"{split}.txt")

        x_train, y_train = load_from_split(config['data.dataset'],
                                           config['data.version'], data_dir,
                                           split_file('train'))
        x_test, y_test = load_from_split(config['data.dataset'],
                                         config['data.version'], data_dir,
                                         split_file('test'))
        x_val, y_val = load_from_split(config['data.dataset'],
                                       config['data.version'], data_dir,
                                       split_file('val'))
    else:
        good_min = 40
        good_classes = []
        n_unique = len(np.unique(data[1]['y']))
        for i in range(n_unique):
            images = data[0][np.equal(i, data[1]['y'])]
            if len(images) >= good_min:
                good_classes = good_classes + [i]

        x = data[0][np.in1d(data[1]['y'], good_classes)]
        y = data[1]['y'][np.in1d(data[1]['y'], good_classes)]
        y_dict = dict(zip(np.unique(y), range(len(np.unique(y)))))
        y = np.vectorize(y_dict.get)(y)

        split = train_test_split if n_train_per_class <= 0 else train_test_split_balanced

        if n_train_per_class <= 0:
            x_train, x_test, y_train, y_test = split(x,
                                                     y,
                                                     train_size=train_size,
                                                     test_size=test_size,
                                                     stratify=y)
            x_train, x_val, y_train, y_val = split(x_train,
                                                   y_train,
                                                   train_size=0.8,
                                                   test_size=0.2,
                                                   stratify=y_train)
        else:
            n_train_per_class = int(np.round(n_train_per_class * 1.6))
            x_train, x_test, y_train, y_test = split(
                np.array(x),
                np.array(y),
                train_size=train_size,
                test_size=test_size,
                n_train_per_class=n_train_per_class,
                n_test_per_class=n_test_per_class)
            x_train, x_val, y_train, y_val = split(
                x_train,
                y_train,
                train_size=0.8,
                n_train_per_class=n_train_per_class,
                test_size=0.2)

    return (x_train, y_train), (x_val, y_val), (x_test, y_test)
Example #8
0
import handshape_datasets
import sklearn
import keras
from keras.models import Model

dataset_id = "lsa16"  #example for the lsa16 dataset
ver = "color"  #version is optional argument, some datasets has one version
supr = False  #supr is optional, some datasets can delete temporary files if it have .npz file

epochs = 15
batch_size = 64

dataset = handshape_datasets.load(dataset_id, version=ver,
                                  delete=supr)  #load the dataset
input_shape = dataset[0][0].shape  #obtain the shape
classes = dataset[1]['y'].max() + 1  #obtain the number ofclasses
"""build the model"""
base_model = keras.applications.mobilenet.MobileNet(
    input_shape=(input_shape[0], input_shape[1], 3),
    weights='imagenet',
    include_top=False)
output = keras.layers.GlobalAveragePooling2D()(base_model.output)
output = keras.layers.Dense(32, activation='relu')(output)
output = keras.layers.Dense(classes, activation='softmax')(output)
model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer='Adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
"""split the dataset (its optional)"""
test_size = 0.1
X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(
Example #9
0
import handshape_datasets as hd
import logging
import numpy as np

#hd.list_datasets()
x, metadata = hd.load("rwth")
print(x.shape)

for k in metadata:
    print(k, metadata[k].shape, metadata[k].min(), metadata[k].max())

#hd.clear("Nus1")
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)
#logging.debug(f"This message should go to the log file")
#logging.info("So should this")
#logging.warning("And this, too")
Example #10
0
# Basic example showing how to get a dataset
import handshape_datasets as hd

DATASET_NAME = "PugeaultASL_A"
#ersion=dict({'1':'WithGabor'})

#ciarp_info = hd.info(DATASET_NAME)
#x,metadata = hd.load(DATASET_NAME,version='WithGabor') ciarp
#x,metadata = hd.load(DATASET_NAME, version='bw') nus1
#x,metadata = hd.load(DATASET_NAME, version='hn')
x, metadata = hd.load(DATASET_NAME)
print(x.shape)
print(x[1])
print(x.max())
print(x.min())
for k in metadata:
    print(k, metadata[k].shape, metadata[k].min(), metadata[k].max())

#print(ciarp_info.summary())

#ciarp[0].show_dataset() #nunca devuelve un dataset, sino que en el caso de lsa16 devuelve un np.array y un dict (x y metadata)

# ciarp.show_dataset(subsets=["test_Kinect_WithGabor"],samples=128)

# ciarp.show_dataset(subsets=["test_Kinect_WithGabor"],samples=[1,2,3,0,15,1,200])
def load_ciarp(data_dir, config, splits):
    """
    Load ciarp dataset.

    Args:
        data_dir (str): path of the directory with 'splits', 'data' subdirs.
        config (dict): general dict with program settings.
        splits (list): list of strings 'train'|'val'|'test'

    Returns (dict): dictionary with keys as splits and values as tf.Dataset

    """

    DATASET_NAME = "ciarp"
    DATASET_PATH = "/develop/data/ciarp/data"

    data = hd.load(DATASET_NAME, DATASET_PATH)

    x_train, y_train = data['train_Kinect_WithoutGabor']
    x_test, y_test = data['test_Kinect_WithoutGabor']

    X = np.concatenate((x_train, x_test), axis=0)
    y = np.concatenate((y_train, y_test), axis=0)

    _, uniqueClasses = np.unique(y, return_counts=True)

    _, amountPerTrain = np.unique(y_train, return_counts=True)
    _, amountPerTest = np.unique(y_test, return_counts=True)

    x_train, x_test, y_train, y_test = train_test_split_balanced(X,
                                                                 y,
                                                                 train_size=config['data.train_size'],
                                                                 test_size=config['data.test_size'])
    x_train, x_test = x_train / 255.0, x_test / 255.0

    _, amountPerTrain = np.unique(y_train, return_counts=True)
    _, amountPerTest = np.unique(y_test, return_counts=True)

    train_datagen_args = dict(featurewise_center=True,
                              featurewise_std_normalization=True,
                              rotation_range=config['data.rotation_range'],
                              width_shift_range=config['data.width_shift_range'],
                              height_shift_range=config['data.height_shift_range'],
                              horizontal_flip=config['data.horizontal_flip'],
                              fill_mode='constant',
                              cval=0)
    train_datagen = ImageDataGenerator(train_datagen_args)
    train_datagen.fit(x_train)

    test_datagen_args = dict(featurewise_center=True,
                             featurewise_std_normalization=True,
                             fill_mode='constant',
                             cval=0)
    test_datagen = ImageDataGenerator(test_datagen_args)
    test_datagen.fit(x_train)

    w, h, c = list(map(int, config['model.x_dim'].split(',')))

    ret = {}
    for split in splits:
        # n_way (number of classes per episode)
        if split in ['val', 'test']:
            n_way = config['data.test_way']
        else:
            n_way = config['data.train_way']

        # n_support (number of support examples per class)
        if split in ['val', 'test']:
            n_support = config['data.test_support']
        else:
            n_support = config['data.train_support']

        # n_query (number of query examples per class)
        if split in ['val', 'test']:
            n_query = config['data.test_query']
        else:
            n_query = config['data.train_query']

        if split in ['val', 'test']:
            y = y_test
            x = x_test
            dg = train_datagen
            dg_args = train_datagen_args
        else:
            y = y_train
            x = x_train
            dg = test_datagen
            dg_args = test_datagen_args

        amountPerClass = amountPerTest if split in ['val', 'test'] else amountPerTrain

        i = np.argsort(y)
        x = x[i, :, :, :]
        
        if config['model.type'] in ['processed']:
            for index in i:
                x[index, :, :, :] = dg.apply_transform(x[index], dg_args)

        data = np.reshape(x, (len(uniqueClasses), amountPerClass[0], w, h, c))

        data_loader = DataLoader(data,
                                 n_classes=len(uniqueClasses),
                                 n_way=n_way,
                                 n_support=n_support,
                                 n_query=n_query,
                                 x_dim=(w, h, c))

        ret[split] = data_loader

    return ret