def mnist_common_generator(tmp_dir, training, how_many, data_filename, label_filename, start_from=0): """Image generator for MNIST. Args: tmp_dir: path to temporary storage directory. training: a Boolean; if true, we use the train set, otherwise the test set. how_many: how many images and labels to generate. data_filename: file that contains features data. label_filename: file that contains labels. start_from: from which image to start. Returns: An instance of image_generator that produces MNIST images. """ data_path = os.path.join(tmp_dir, data_filename) labels_path = os.path.join(tmp_dir, label_filename) images = _extract_mnist_images(data_path, 60000 if training else 10000) labels = _extract_mnist_labels(labels_path, 60000 if training else 10000) # Shuffle the data to make sure classes are well distributed. data = list(zip(images, labels)) random.shuffle(data) images, labels = list(zip(*data)) return image_utils.image_generator( images[start_from:start_from + how_many], labels[start_from:start_from + how_many])
def mnist_common_generator(tmp_dir, training, how_many, data_filename, label_filename, start_from=0): """Image generator for MNIST. Args: tmp_dir: path to temporary storage directory. training: a Boolean; if true, we use the train set, otherwise the test set. how_many: how many images and labels to generate. data_filename: file that contains features data. label_filename: file that contains labels. start_from: from which image to start. Returns: An instance of image_generator that produces MNIST images. """ data_path = os.path.join(tmp_dir, data_filename) labels_path = os.path.join(tmp_dir, label_filename) images = _extract_mnist_images(data_path, 60000 if training else 10000) labels = _extract_mnist_labels(labels_path, 60000 if training else 10000) # Shuffle the data to make sure classes are well distributed. data = list(zip(images, labels)) random.shuffle(data) images, labels = list(zip(*data)) return image_utils.image_generator(images[start_from:start_from + how_many], labels[start_from:start_from + how_many])
def cifar10_generator(tmp_dir, training, how_many, start_from=0): """Image generator for CIFAR-10. Args: tmp_dir: path to temporary storage directory. training: a Boolean; if true, we use the train set, otherwise the test set. how_many: how many images and labels to generate. start_from: from which image to start. Returns: An instance of image_generator that produces CIFAR-10 images and labels. """ _get_cifar10(tmp_dir) data_files = _CIFAR10_TRAIN_FILES if training else _CIFAR10_TEST_FILES all_images, all_labels = [], [] for filename in data_files: path = os.path.join(tmp_dir, _CIFAR10_PREFIX, filename) with tf.gfile.Open(path, "r") as f: data = cPickle.load(f) images = data["data"] num_images = images.shape[0] images = images.reshape( (num_images, 3, _CIFAR10_IMAGE_SIZE, _CIFAR10_IMAGE_SIZE)) all_images.extend([ np.squeeze(images[j]).transpose((1, 2, 0)) for j in xrange(num_images) ]) labels = data["labels"] all_labels.extend([labels[j] for j in xrange(num_images)]) return image_utils.image_generator( all_images[start_from:start_from + how_many], all_labels[start_from:start_from + how_many])
def cifar_generator(cifar_version, tmp_dir, training, how_many, start_from=0): """Image generator for CIFAR-10 and 100. Args: cifar_version: string; one of "cifar10" or "cifar100" tmp_dir: path to temporary storage directory. training: a Boolean; if true, we use the train set, otherwise the test set. how_many: how many images and labels to generate. start_from: from which image to start. Returns: An instance of image_generator that produces CIFAR-10 images and labels. """ if cifar_version == "cifar10": url = _CIFAR10_URL train_files = _CIFAR10_TRAIN_FILES test_files = _CIFAR10_TEST_FILES prefix = _CIFAR10_PREFIX image_size = _CIFAR10_IMAGE_SIZE label_key = "labels" elif cifar_version == "cifar100" or cifar_version == "cifar20": url = _CIFAR100_URL train_files = _CIFAR100_TRAIN_FILES test_files = _CIFAR100_TEST_FILES prefix = _CIFAR100_PREFIX image_size = _CIFAR100_IMAGE_SIZE if cifar_version == "cifar100": label_key = "fine_labels" else: label_key = "coarse_labels" _get_cifar(tmp_dir, url) data_files = train_files if training else test_files all_images, all_labels = [], [] for filename in data_files: path = os.path.join(tmp_dir, prefix, filename) with tf.gfile.Open(path, "rb") as f: if six.PY2: data = cPickle.load(f) else: data = cPickle.load(f, encoding="latin1") images = data["data"] num_images = images.shape[0] images = images.reshape((num_images, 3, image_size, image_size)) all_images.extend([ np.squeeze(images[j]).transpose((1, 2, 0)) for j in range(num_images) ]) labels = data[label_key] all_labels.extend([labels[j] for j in range(num_images)]) return image_utils.image_generator( all_images[start_from:start_from + how_many], all_labels[start_from:start_from + how_many])
def cifar_generator(cifar_version, tmp_dir, training, how_many, start_from=0): """Image generator for CIFAR-10 and 100. Args: cifar_version: string; one of "cifar10" or "cifar100" tmp_dir: path to temporary storage directory. training: a Boolean; if true, we use the train set, otherwise the test set. how_many: how many images and labels to generate. start_from: from which image to start. Returns: An instance of image_generator that produces CIFAR-10 images and labels. """ if cifar_version == "cifar10": url = _CIFAR10_URL train_files = _CIFAR10_TRAIN_FILES test_files = _CIFAR10_TEST_FILES prefix = _CIFAR10_PREFIX image_size = _CIFAR10_IMAGE_SIZE label_key = "labels" elif cifar_version == "cifar100" or cifar_version == "cifar20": url = _CIFAR100_URL train_files = _CIFAR100_TRAIN_FILES test_files = _CIFAR100_TEST_FILES prefix = _CIFAR100_PREFIX image_size = _CIFAR100_IMAGE_SIZE if cifar_version == "cifar100": label_key = "fine_labels" else: label_key = "coarse_labels" _get_cifar(tmp_dir, url) data_files = train_files if training else test_files all_images, all_labels = [], [] for filename in data_files: path = os.path.join(tmp_dir, prefix, filename) with tf.gfile.Open(path, "rb") as f: if six.PY2: data = cPickle.load(f) else: data = cPickle.load(f, encoding="latin1") images = data["data"] num_images = images.shape[0] images = images.reshape((num_images, 3, image_size, image_size)) all_images.extend([ np.squeeze(images[j]).transpose((1, 2, 0)) for j in range(num_images) ]) labels = data[label_key] all_labels.extend([labels[j] for j in range(num_images)]) return image_utils.image_generator( all_images[start_from:start_from + how_many], all_labels[start_from:start_from + how_many])
def testImageGenerator(self): # 2 random images np.random.seed(1111) # To avoid any flakiness. image1 = np.random.randint(0, 255, size=(10, 12, 3)) image2 = np.random.randint(0, 255, size=(10, 12, 3)) # Call image generator on the 2 images with labels [1, 2]. encoded_imgs, labels = [], [] for dictionary in image_utils.image_generator([image1, image2], [1, 2]): self.assertEqual(sorted(list(dictionary)), [ "image/class/label", "image/encoded", "image/format", "image/height", "image/width" ]) self.assertEqual(dictionary["image/format"], ["png"]) self.assertEqual(dictionary["image/height"], [12]) self.assertEqual(dictionary["image/width"], [10]) encoded_imgs.append(dictionary["image/encoded"]) labels.append(dictionary["image/class/label"]) # Check that the result labels match the inputs. self.assertEqual(len(labels), 2) self.assertEqual(labels[0], [1]) self.assertEqual(labels[1], [2]) # Decode images and check that they match the inputs. self.assertEqual(len(encoded_imgs), 2) image_t = tf.placeholder(dtype=tf.string) decoded_png_t = tf.image.decode_png(image_t) with self.test_session() as sess: encoded_img1 = encoded_imgs[0] self.assertEqual(len(encoded_img1), 1) decoded1 = sess.run(decoded_png_t, feed_dict={image_t: encoded_img1[0]}) self.assertAllClose(decoded1, image1) encoded_img2 = encoded_imgs[1] self.assertEqual(len(encoded_img2), 1) decoded2 = sess.run(decoded_png_t, feed_dict={image_t: encoded_img2[0]}) self.assertAllClose(decoded2, image2)
def gen(): files = os.listdir(data_dir) files = list(set(files) - set([LABEL_FILE])) label_path = data_dir + LABEL_FILE label_df = pd.read_csv(label_path)[['id', 'landmark_id']] label_df.set_index('id', drop=True, inplace=True) label_dict = label_df.to_dict()['landmark_id'] image_names, labels = [], [] for _file in files: label = label_dict.get(_file.split('.')[0], None) if label: image_names.append(_file) labels.append(label) # sampling data = list(zip(image_names, labels)) random.shuffle(data) images, labels = list(zip(*data)) images, labels = images[:nb_images], labels[:nb_images] print(len(images)) images = [plt.imread(data_dir + image_name) for image_name in image_names] for image, label in zip(images, labels): yield image_utils.image_generator(image, label)
def testImageGenerator(self): # 2 random images np.random.seed(1111) # To avoid any flakiness. image1 = np.random.randint(0, 255, size=(10, 12, 3)) image2 = np.random.randint(0, 255, size=(10, 12, 3)) # Call image generator on the 2 images with labels [1, 2]. encoded_imgs, labels = [], [] for dictionary in image_utils.image_generator([image1, image2], [1, 2]): self.assertEqual( sorted(list(dictionary)), [ "image/class/label", "image/encoded", "image/format", "image/height", "image/width" ]) self.assertEqual(dictionary["image/format"], ["png"]) self.assertEqual(dictionary["image/height"], [12]) self.assertEqual(dictionary["image/width"], [10]) encoded_imgs.append(dictionary["image/encoded"]) labels.append(dictionary["image/class/label"]) # Check that the result labels match the inputs. self.assertEqual(len(labels), 2) self.assertEqual(labels[0], [1]) self.assertEqual(labels[1], [2]) # Decode images and check that they match the inputs. self.assertEqual(len(encoded_imgs), 2) image_t = tf.placeholder(dtype=tf.string) decoded_png_t = tf.image.decode_png(image_t) with self.test_session() as sess: encoded_img1 = encoded_imgs[0] self.assertEqual(len(encoded_img1), 1) decoded1 = sess.run(decoded_png_t, feed_dict={image_t: encoded_img1[0]}) self.assertAllClose(decoded1, image1) encoded_img2 = encoded_imgs[1] self.assertEqual(len(encoded_img2), 1) decoded2 = sess.run(decoded_png_t, feed_dict={image_t: encoded_img2[0]}) self.assertAllClose(decoded2, image2)