Ejemplo n.º 1
0
def gridfonts(*args, **kwargs):
    dataset = cx.Dataset()
    url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/gridfonts.npy"
    path = get_file("gridfonts.npy", origin=url)
    ds = np.load(path, allow_pickle=True)
    ## [letters, labels]
    letters = np.array([matrix for matrix in ds[0]])
    targets = np.array([matrix for matrix in ds[0]])
    labels = np.array([char for char in ds[1]], dtype=str)
    dataset.name = "Gridfonts"
    dataset.description = """
This dataset originates from Douglas Hofstadter's research
group:

http://goosie.cogsci.indiana.edu/pub/gridfonts.data

![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png)

These data have been processed to make them neural
network friendly:

https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py

The dataset is composed of letters on a 25 row x 9 column
grid. The inputs and targets are identical, and the labels
contain a string identifying the letter.

You can read a thesis using part of this dataset here:
https://repository.brynmawr.edu/compsci_pubs/78/
"""
    dataset.load_direct([letters], [targets], [labels])
    return dataset
Ejemplo n.º 2
0
def cifar10(*args, **kwargs):
    dataset = cx.Dataset()
    from keras.datasets import cifar10
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    inputs = np.concatenate((x_train, x_test))
    x_train, x_test = None, None
    inputs = inputs.astype('float32')
    inputs /= 255
    labels = np.concatenate((y_train, y_test))
    y_train, y_test = None, None
    targets = to_categorical(labels, 10)
    labels = np.array([str(label[0]) for label in labels], dtype=str)
    dataset.name = "CIFAR-10"
    dataset.description = """
Original source: https://www.cs.toronto.edu/~kriz/cifar.html

The CIFAR-10 dataset consists of 60000 32x32 colour images in 10
classes, with 6000 images per class.

The classes are completely mutually exclusive. There is no overlap
between automobiles and trucks. "Automobile" includes sedans, SUVs,
things of that sort. "Truck" includes only big trucks. Neither
includes pickup trucks.
"""
    dataset.load_direct([inputs], [targets], [labels])
    return dataset
Ejemplo n.º 3
0
def figure_ground_a(*args, **kwargs):
    dataset = cx.Dataset()
    url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/figure_ground_a.npy"
    path = get_file("figure_ground_a.npy", origin=url)
    ds = np.load(path, allow_pickle=True)
    ## [[[letter], [brim, body]], ...]
    letters = np.array([pair[0] for pair in ds])
    brims = np.array([pair[1][0] for pair in ds])
    bodies = np.array([pair[1][1] for pair in ds])
    dataset.name = "Figure-Ground A"
    dataset.description = """
This dataset (the so-called a-tabase) originates from Douglas
Hofstadter's research group:

http://goosie.cogsci.indiana.edu/pub/gridfonts.data

![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png)

These data (all the letter A) have been processed to make them neural
network friendly:

https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py

The brim and body parts have been idenified manually.  The dataset is
composed of letters on a 17 row x 9 column grid (4 lines not used on
top and another 4 not used on the bottom of each letter were removed
from the original 25x9 latter images). The inputs are composed of the
full letter. The targets are composed of a picture of the body and
the brim.

You can read a thesis using part of this dataset here:
https://repository.brynmawr.edu/compsci_pubs/78/
"""
    dataset.load_direct([letters], [brims, bodies])
    return dataset
Ejemplo n.º 4
0
def colors(
        *args,
        path='colors.csv',
        url="https://raw.githubusercontent.com/Calysto/conx-data/master/colors/colors.csv",
        **kwargs):
    dataset = cx.Dataset()
    from keras.utils import get_file
    path = get_file(path, origin=url)
    fp = open(path, "r")
    reader = csv.reader(fp)
    inputs = []
    labels = []
    targets = []
    count = 1
    for line in reader:
        name, r, g, b = line
        if name == "name": continue  # first line is header
        inputs.append(
            [float(int(r) / 255),
             float(int(g) / 255),
             float(int(b) / 255)])
        targets.append([count])
        labels.append(name)
        count += 1
    inputs = np.array(inputs, dtype='float32')
    targets = np.array(targets, dtype='uint16')
    dataset.name = "Colors"
    dataset.description = """
Original source: https://github.com/andrewortman/colorbot

This dataset also includes some ignored in original data.

Inspired by:

* http://aiweirdness.com/tagged/paint-colors

When initially loaded, this database has the following format:

* labels: [color_name_string, ...] # order matches target
* inputs: [[red, green, blue], ...] # scaled between 0 and 1
* targets: [[int], ...] # number of label

For example:

```
>>> import conx as cx
>>> ds = cx.Dataset.get("colors")
>>> ds.labels[0], ds.inputs[0], ds.targets[0]
('tidewater',
 [0.7686274647712708, 0.843137264251709, 0.8352941274642944],
 [1])
```
"""
    dataset.load_direct([inputs], [targets], [labels])
    return dataset
Ejemplo n.º 5
0
def cmu_faces_full_size(*args, path="cmu_faces_full_size.npz", **kwargs):
    dataset = cx.Dataset()
    inputs, labels = load_dataset_npz(
        path,
        "https://raw.githubusercontent.com/Calysto/conx-data/master/cmu_faces/cmu_faces_full_size.npz"
    )
    dataset.name = "CMU Faces, full-size"
    dataset.description = """
Original source: http://archive.ics.uci.edu/ml/datasets/cmu+face+images
"""
    return process_face_data(dataset, inputs, labels)
Ejemplo n.º 6
0
def cifar100(*args, **kwargs):
    dataset = cx.Dataset()
    from keras.datasets import cifar100
    (x_train, y_train), (x_test, y_test) = cifar100.load_data()
    inputs = np.concatenate((x_train, x_test))
    labels = np.concatenate((y_train, y_test))
    targets = to_categorical(labels, 100)
    labels = np.array([str(label[0]) for label in labels], dtype=str)
    inputs = inputs.astype('float32')
    inputs /= 255
    dataset.name = "CIFAR-100"
    dataset.description = """
Original source: https://www.cs.toronto.edu/~kriz/cifar.html

This dataset is just like the CIFAR-10, except it has 100 classes
containing 600 images each. The 100 classes in the CIFAR-100 are grouped
into 20 superclasses. Each image comes with a "fine" label (the class
to which it belongs) and a "coarse" label (the superclass to which it
belongs).  Here is the list of classes in the CIFAR-100:

Superclass                     | Classes
-------------------------------|-----------------------------------------------------
aquatic mammals	               | beaver, dolphin, otter, seal, whale
fish                           | aquarium fish, flatfish, ray, shark, trout
flowers	                       | orchids, poppies, roses, sunflowers, tulips
food containers                | bottles, bowls, cans, cups, plates
fruit and vegetables           | apples, mushrooms, oranges, pears, sweet peppers
household electrical devices   | clock, computer keyboard, lamp, telephone, television
household furniture            | bed, chair, couch, table, wardrobe
insects	                       | bee, beetle, butterfly, caterpillar, cockroach
large carnivores               | bear, leopard, lion, tiger, wolf
large man-made outdoor things  | bridge, castle, house, road, skyscraper
large natural outdoor scenes   | cloud, forest, mountain, plain, sea
large omnivores and herbivores | camel, cattle, chimpanzee, elephant, kangaroo
medium-sized mammals           | fox, porcupine, possum, raccoon, skunk
non-insect invertebrates       | crab, lobster, snail, spider, worm
people	                       | baby, boy, girl, man, woman
reptiles                       | crocodile, dinosaur, lizard, snake, turtle
small mammals                  | hamster, mouse, rabbit, shrew, squirrel
trees                          | maple, oak, palm, pine, willow
vehicles 1                     | bicycle, bus, motorcycle, pickup truck, train
vehicles 2                     | lawn-mower, rocket, streetcar, tank, tractor

"""
    dataset.load_direct([inputs], [targets], [labels])
    return dataset
Ejemplo n.º 7
0
def mnist_h5(*args, **kwargs):
    """
    Load the Keras MNIST dataset from an H5 file.
    """
    import h5py

    path = "mnist.h5"
    url = "https://raw.githubusercontent.com/Calysto/conx-data/master/mnist/mnist.h5"
    path = get_file(path, origin=url)
    h5 = h5py.File(path, "r")
    dataset = cx.Dataset()
    dataset._inputs = h5["inputs"]
    dataset._targets = h5["targets"]
    dataset._labels = h5["labels"]
    dataset.h5 = h5
    dataset.name = "MNIST-H5"
    dataset.description = description
    dataset._cache_values()
    return dataset
Ejemplo n.º 8
0
def fingers(*args, path='fingers.npz', **kwargs):
    dataset = cx.Dataset()
    inputs, labels = load_dataset_npz(
        path,
        "https://raw.githubusercontent.com/Calysto/conx-data/master/fingers/fingers.npz")
    inputs = inputs.astype('float32')
    inputs /= 255
    make_target_vector = lambda label: [int(label == n) for n in range(6)]
    targets = np.array([make_target_vector(l) for l in labels]).astype('uint8')
    dataset.name = "Fingers"
    dataset.description = """
This dataset contains 12,000 RGB images of human hands showing different
numbers of fingers, from zero to five.  The same fingers are always used
to represent each number category (e.g., all images of "two" have raised
index and middle fingers).  Each image is a 30 x 40 x 3 array of
floating-point numbers in the range 0 to 1.  The target data consists of
one-hot binary vectors of size 6 corresponding to the classification
categories "zero" through "five".  There are 2000 images for each category.

Created by Shreeda Segan and Albert Yu at Sarah Lawrence College.
"""
    dataset.load_direct([inputs], [targets], [labels])
    return dataset
Ejemplo n.º 9
0
def mnist(*args, **kwargs):
    from keras.datasets import mnist
    import keras.backend as K

    # input image dimensions
    img_rows, img_cols = 28, 28
    # the data, shuffled and split between train and test sets
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
    x_train = x_train.astype('float16')
    x_test = x_test.astype('float16')
    inputs = np.concatenate((x_train, x_test)) / 255
    labels = np.concatenate((y_train, y_test))  # ints, 0 to 10
    ###########################################
    # fix mis-labeled image(s) in Keras dataset
    labels[10994] = 9
    ###########################################
    targets = to_categorical(labels).astype("uint8")
    labels = np.array([str(label) for label in labels], dtype=str)
    dataset = cx.Dataset()
    dataset.load_direct([inputs], [targets], [labels])
    return dataset
Ejemplo n.º 10
0
             shape=512,
             vshape=(16, 32),
             activation='relu',
             dropout=0.2))
net.add(
    cx.Layer("hidden2",
             shape=512,
             vshape=(16, 32),
             activation='relu',
             dropout=0.2))
net.add(cx.Layer("output", shape=10, activation='softmax'))

net.connect('input', 'hidden1')
net.connect('hidden1', 'hidden2')
net.connect('hidden2', 'output')

net.compile(loss='mean_squared_error', optimizer='sgd')

ds = cx.Dataset()
ds.get("mnist")
net.set_dataset(ds)
#net.rescale_inputs((0,255), (0,1), 'float32')
#net.shuffle_dataset()
ds.inputs.reshape(784)
ds.slice(100)
#net.set_targets_to_categories(10)
ds.summary()

net.train(10)
#net.test()