Esempio n. 1
0
def read_data_sets(train_dir,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000,
                   seed=None,
                   source_url=None): # omit url since we are using our own dataset
    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
    TEST_IMAGES = 'test-images-idx3-ubyte.gz'
    TEST_LABELS = 'test-labels-idx1-ubyte.gz'

    local_file = base.maybe_download(TRAIN_IMAGES, train_dir, None) # omit url, local file will be a path

    # type: DataSets
    with gfile.Open(local_file, 'rb') as f:
        train_images = mnist_module.extract_images(f)

    print (train_images.shape)

    local_file = base.maybe_download(TRAIN_LABELS, train_dir, None) # omit url
    with gfile.Open(local_file, 'rb') as f:
        train_labels = mnist_module.extract_labels(f, one_hot=one_hot)

    local_file = base.maybe_download(TEST_IMAGES, train_dir, None) # omit url
    with gfile.Open(local_file, 'rb') as f:
        test_images = mnist_module.extract_images(f)

    local_file = base.maybe_download(TEST_LABELS, train_dir, None) # omit url
    with gfile.Open(local_file, 'rb') as f:
        test_labels = mnist_module.extract_labels(f, one_hot=one_hot)

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'
            .format(len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    options = dict(dtype=dtype, reshape=reshape, seed=seed)

    train = mnist_module.DataSet(train_images, train_labels, **options)
    validation = mnist_module.DataSet(validation_images, validation_labels, **options)
    test = mnist_module.DataSet(test_images, test_labels, **options)

    return base.Datasets(train=train, validation=validation, test=test)
Esempio n. 2
0
def apply_gaussian_to_dataset(dataset):
    blur_dataset = collections.namedtuple('Datasets',
                                          ['train', 'validation', 'test'])
    train_images = apply_gaussian_filter(dataset.train.images)
    test_images = apply_gaussian_filter(dataset.test.images)
    validation_images = apply_gaussian_filter(dataset.validation.images)

    blur_dataset.train = mnist.DataSet(train_images,
                                       dataset.train.labels,
                                       reshape=False)
    blur_dataset.test = mnist.DataSet(test_images,
                                      dataset.test.labels,
                                      reshape=False)
    blur_dataset.validation = mnist.DataSet(validation_images,
                                            dataset.validation.labels,
                                            reshape=False)

    return blur_dataset
Esempio n. 3
0
def convert_to_data_sets(data_gzs,
                         one_hot=False,
                         dtype=dtypes.float32,
                         reshape=True,
                         validation_size=5000,
                         seed=None):
    """ Modified version of tensorflow/tensorflow/contrib/learn/python/learn/datasets/mnist.py """

    with gfile.Open(data_gzs['train-images'][0], 'rb') as f:
        train_images = tf_mnist.extract_images(f)

    with gfile.Open(data_gzs['train-labels'][0], 'rb') as f:
        train_labels = tf_mnist.extract_labels(f, one_hot=one_hot)

    with gfile.Open(data_gzs['t10k-images'][0], 'rb') as f:
        test_images = tf_mnist.extract_images(f)

    with gfile.Open(data_gzs['t10k-labels'][0], 'rb') as f:
        test_labels = tf_mnist.extract_labels(f, one_hot=one_hot)

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    options = dict(dtype=dtype, reshape=reshape, seed=seed)

    train = tf_mnist.DataSet(train_images, train_labels, **options)
    validation = tf_mnist.DataSet(validation_images, validation_labels,
                                  **options)
    test = tf_mnist.DataSet(test_images, test_labels, **options)

    return base.Datasets(train=train, validation=validation, test=test)
def extract_n_data_sets(datasets, label=[1, 2, 3]):
    test_data_set_image = datasets.images
    test_data_set_label = datasets.labels
    extract_images = np.array([])
    extract_labels = np.array([])
    cnt = 0
    for i in range(datasets.num_examples):
        # for i in range(10):
        if (np.argmax(test_data_set_label[i]) in label):
            cnt += 1
            extract_images = np.append(extract_images, test_data_set_image[i])
            extract_labels = np.append(extract_labels, test_data_set_label[i])
    extract_images = extract_images.astype(np.float32)
    # return mnist.DataSet(extract_images, extract_labels, dtype = dtypes.float32, reshape = True)
    return mnist.DataSet(extract_images.reshape(cnt, 784),
                         extract_labels.reshape(cnt, 10),
                         dtype=dtypes.uint8,
                         reshape=False)
Esempio n. 5
0
def ConvertImg(imgFolder):
    RawImgSize = (512, 512)
    if os.path.isdir(imgFolder) is False:
        logging.warning('Raw image folder doesn\'t exist')
    train_directory = os.path.join(imgFolder)
    all_entries = os.listdir(train_directory)
    dirnames = []
    for entry in all_entries:
        if os.path.isdir(os.path.join(train_directory, entry)):
            dirnames.append(entry)

    arr = []
    label = []
    for dirname in dirnames:
        files = os.listdir(os.path.join(train_directory, dirname))

        for file in files:
            # read file as gray image
            img = Image.open(os.path.join(train_directory, dirname,
                                          file)).convert('L')
            if img.size[0] != RawImgSize[0] or img.size[1] != RawImgSize[1]:
                print('Error on Image Size != ', RawImgSize)
            else:
                # Label vector is generated from folder name. It add one label(folder name) to 'label'
                label.append(dirname)
                for i in range(RawImgSize[0]):
                    for j in range(RawImgSize[1]):
                        pixel = float(img.getpixel((j, i)))
                        arr.append(pixel)

    # 'arr' is 1D vector. reshape arr to #file * imageRow * imageCol * 1 numpy array.
    # Then combine with label by mnist default class 'DataSet'
    # return the MNIST-like dataset

    train_labels = np.array(label)
    train_images = np.array(arr).reshape(
        (len(label), RawImgSize[0], RawImgSize[1], 1))
    dtype = dtypes.float32
    reshape = True
    seed = None
    options = dict(dtype=dtype, reshape=reshape, seed=seed)
    mnData = mnist.DataSet(train_images, train_labels, **options)
    return mnData
Esempio n. 6
0
def select_data(n=10, expand_with_deform=0, train_dir='MNIST-data'):
  """Extracts a subset of mnist train data.
  If doublt_with_deform is True, dataset size is doubled adding a deformed duplicate.
  n is number of examples for each digit/class.

  return normal_dataset, expanded_dataset
  """
  # The 2 datasets to be constructed.
  normal = None
  expanded = None

  train_images, train_labels = load_mnist_data(train_dir)

  numbers = [[] for i in range(10)] # 10 classes, 10 arrays.

  # Take n datapoints for each number.
  for i in range(len(train_labels)):
    if sum(len(x) for x in numbers) == 10 * n:
      break

    number = np.where(train_labels[i] == 1)[0][0]
    if len(numbers[number]) < n:
      numbers[number].append(i)

  # import vis_mnist as vm
  # for i in range(10):
  #   vm.show_image(numbers[i])

  # Scramble subset. 'numbers' contain indices into train_labels.
  numbers = np.asarray(numbers)
  numbers = numbers.reshape(10 * n)
  np.random.shuffle(numbers)

  # Actually retrieve the subset.
  subset_images = []
  subset_labels = []
  for i in numbers:
    subset_images.append(train_images[i])
    subset_labels.append(train_labels[i])

  options = dict(dtype=dtypes.float32, reshape=True, seed=None)
  # Construct normal dataset
  normal = mnist.DataSet(np.asarray(subset_images), np.asarray(subset_labels), **options)

  for j in range(expand_with_deform):
    print("Deforming all 'train' images..")
    count = 0
    for i in numbers:
      shape = train_images[i].shape
      image = train_images[i].reshape((28, 28))
      new_image = ed.rotate(image)
      new_image = ed.translate(new_image)
      new_image = ed.deform(new_image)
      subset_images.append(new_image.reshape(shape))
      subset_labels.append(train_labels[i])
      count += 1
      print('Processed image {}'.format(count), end='\r')
    print("\nDeformation done.")

  subset_images = np.asarray(subset_images)
  subset_labels = np.asarray(subset_labels)

  # Shuffle expanded set.
  perm = np.arange(len(subset_images))
  np.random.shuffle(perm)
  subset_images = subset_images[perm]
  subset_labels = subset_labels[perm]

  expanded = mnist.DataSet(subset_images, subset_labels, **options)
  return normal, expanded
Esempio n. 7
0
def read_semeion(fname='semeion/semeion.data'):
    file = open(fname, 'r')
    lines = file.readlines()

    width = 16
    height = 16
    size = width * height
    classes = 10

    images = []
    labels = []
    fnumber = 0

    for line in lines:
        data = line.split(' ')
        image = []
        label = []

        for i in range(0, size):
            image.append(int(float(data[i])))
        images.append(image)

        for i in range(size, size + classes):
            label.append(int(float(data[i])))
        labels.append(label)

        fnumber += 1

    for i in range(len(images)):
        ii = scale(numpy.reshape(images[i], (width, height)), 28, 28)
        ii = numpy.reshape(ii, (28, 28, 1))
        images[i] = ii

    width = 28
    height = 28

    # Shuffle data
    images_shuffle = []
    labels_shuffle = []
    indexes = list(range(len(images)))
    random.shuffle(indexes)
    for i in indexes:
        images_shuffle.append(images[i])
        labels_shuffle.append(labels[i])

    images = images_shuffle
    labels = labels_shuffle

    for i in range(len(labels)):
        labels[i] = numpy.reshape(labels[i], (10, ))

    samples = len(lines)
    train_samples = 1400
    val_samples = 120
    test_samples = 73

    # Train set
    image_train = numpy.array(images[:train_samples], dtype=numpy.float32)
    image_train = image_train.reshape(train_samples, width, height, 1)

    label_train = numpy.array(labels[:train_samples], dtype=numpy.float32)

    # Validation Set
    image_val = numpy.array(images[train_samples:train_samples + val_samples],
                            dtype=numpy.float32)
    image_val = image_val.reshape(val_samples, width, height, 1)

    label_val = numpy.array(labels[train_samples:train_samples + val_samples],
                            dtype=numpy.float32)

    # test set
    image_test = numpy.array(images[train_samples + val_samples:],
                             dtype=numpy.float32)
    image_test = image_test.reshape(test_samples, width, height, 1)

    label_test = numpy.array(labels[train_samples + val_samples:],
                             dtype=numpy.float32)

    options = dict(dtype=dtypes.float32, reshape=True, seed=None)

    train = mnist.DataSet(image_train, label_train, **options)
    validation = mnist.DataSet(image_val, label_val, **options)
    test = mnist.DataSet(image_test, label_test, **options)

    return base.Datasets(train=train, validation=validation, test=test)
Esempio n. 8
0
def read_opt(fname='optical/optdigits_csv.csv'):
    file = open(fname, 'r')
    lines = file.readlines()
    lines = lines[1:]
    width = 8
    height = 8
    size = width * height
    classes = 10

    images = []
    labels = []
    fnumber = 0

    for line in lines:
        data = line.split(',')
        image = []

        for i in range(0, size):
            image.append(int(float(data[i])))
        images.append(image)

        label = numpy.zeros((10, ))
        label[int(data[-1])] = 1
        labels.append(label)

        fnumber += 1

    images_scale = [[]] * len(images)
    for i in range(len(images)):
        im_8 = numpy.reshape(images[i], (8, 8))
        im_reshape = scale(im_8, 28, 28)
        images_scale[i] = numpy.reshape(im_reshape, -1)

    images = images_scale

    # Shuffle data
    images_shuffle = []
    labels_shuffle = []
    indexes = list(range(len(images)))
    random.shuffle(indexes)
    for i in indexes:
        images_shuffle.append(images[i])
        labels_shuffle.append(labels[i])

    images = images_shuffle
    labels = labels_shuffle

    samples = len(images)

    width = 28
    height = 28

    train_samples = 1400
    val_samples = 1400
    test_samples = 2800

    # Train set
    image_train = numpy.array(images[:train_samples], dtype=numpy.float32)
    image_train = image_train.reshape(train_samples, width, height, 1)
    label_train = numpy.array(labels[:train_samples], dtype=numpy.float32)

    # Validation Set
    image_val = numpy.array(images[train_samples:train_samples + val_samples],
                            dtype=numpy.float32)
    image_val = image_val.reshape(val_samples, width, height, 1)
    label_val = numpy.array(labels[train_samples:train_samples + val_samples],
                            dtype=numpy.float32)

    # test set
    image_test = numpy.array(images[train_samples + val_samples:train_samples +
                                    val_samples + test_samples],
                             dtype=numpy.float32)
    image_test = image_test.reshape(test_samples, width, height, 1)
    label_test = numpy.array(labels[train_samples + val_samples:train_samples +
                                    val_samples + test_samples],
                             dtype=numpy.float32)

    options = dict(dtype=dtypes.float32, reshape=True, seed=None)

    train = mnist.DataSet(image_train, label_train, **options)
    validation = mnist.DataSet(image_val, label_val, **options)
    test = mnist.DataSet(image_test, label_test, **options)

    return base.Datasets(train=train, validation=validation, test=test)
Esempio n. 9
0
 def fake():
     return mnist.DataSet([], [],
                          fake_data=True,
                          one_hot=one_hot,
                          dtype=dtype,
                          seed=seed)