def input_fn(): caption_dataset = Dataset.from_tensor_slices(list(caps)) filename_dataset = Dataset.from_tensor_slices(filenames) def my_split(text): text = text.decode("utf-8") # todo: take care of the unknown character. idx = [self.word_to_idx.get(ch, 0) for ch in text] idx.insert(0, self.word_to_idx['<START>']) idx.append(self.word_to_idx['<END>']) return np.array(idx, dtype=np.int32) # todo: tf has issue with `tf.string_split` with unicode # https://github.com/tensorflow/tensorflow/issues/11399 # so I use `py_func` here. index_dataset = caption_dataset.map( lambda text: tf.py_func(my_split, [text], tf.int32), num_threads=8) image_dataset = filename_dataset.map( get_decode_image_fn(is_training=is_distort), num_threads=8) caption_structure = { "raw": caption_dataset, "index": index_dataset } return image_dataset, caption_structure
def mnist(): # load mnist data mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # make Datasets train_dataset = Dataset.from_tensor_slices( (mnist.train._images, mnist.train._labels)) test_dataset = Dataset.from_tensor_slices( (mnist.test._images, mnist.test._labels)) return train_dataset, test_dataset
def getimage(image, batch_size, trainnum=2000, testnum=500): train_image = [] train_label = [] test_image = [] test_label = [] if image == 'FID': image = os.walk(r'D:\360download\FIDS30') classnum = 0 for i in image: if i[1] == []: imagepath = glob.glob('%s\\*.jpg' % (i[0])) for i in range(len(imagepath[0:-5])): #取后五张作为测试数据,其余训练 train_image.append(imagepath[i]) train_label.append(classnum) for i in range(5): test_image.append(imagepath[i - 6]) test_label.append(classnum) classnum = classnum + 1 # 调用图片生成器,把训练集图片转换成三维数组 tr_data = ImageDataGenerator(images=train_image, labels=train_label, batch_size=batch_size, num_classes=classnum) # 调用图片生成器,把测试集图片转换成三维数组 test_data = ImageDataGenerator(images=test_image, labels=test_label, batch_size=batch_size, num_classes=classnum, shuffle=False) tr_data = tr_data.data test_data = test_data.data return tr_data, test_data, classnum if image == 'cifar10': cifar10_dir = 'cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10( cifar10_dir) #加载cifar数据 train_image = X_train[list(range(trainnum))] train_label = y_train[list(range(trainnum))] test_image = X_test[list(range(testnum))] test_label = y_test[list(range(testnum))] classnum = 10 tr_data = Dataset.from_tensor_slices((train_image, train_label)) tr_data = tr_data.map(resize) tr_data = tr_data.batch(batch_size) test_data = Dataset.from_tensor_slices((test_image, test_label)) test_data = test_data.map(resize) test_data = test_data.batch(batch_size) return tr_data, test_data, classnum
def configure_dataset(): """ :returns: """ logger = get_logger() image_list = [] image_list.extend( glob.glob( os.path.join(FLAGS.dataset_path, "ch4_training_images", "*.jpg"))) image_list_op = tf.constant(image_list) logger.debug("image_list_op: {}".format(image_list_op)) dataset_iterator = Dataset.from_tensor_slices(image_list_op) next_images = dataset_iterator.make_one_shot_iterator().get_next() #: Create a random shuffle queue. queue = tf.RandomShuffleQueue(capacity=20, min_after_dequeue=int(0.9 * 20), shapes=next_images.shape, dtypes=next_images.dtype) #: Create an op to enqueue one item. enqueue = queue.enqueue(next_images) #: Create a queue runner. qr = tf.train.QueueRunner(queue, [enqueue] * 2) tf.train.add_queue_runner(qr) return queue.dequeue_many(FLAGS.batch_size)
def do_without_placeholder(): global data global label dataset = Dataset.from_tensor_slices((data, label)) dataset = dataset.batch(3) iterator = dataset.make_initializable_iterator() (batch_X, batch_y) = iterator.get_next() W = tf.Variable([[0], [0]], dtype=tf.float32) b = tf.Variable([0], dtype=tf.float32) y = tf.matmul(batch_X, W) + b loss = tf.losses.mean_squared_error(batch_y, y) train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) with tf.Session() as sess: sess.run(iterator.initializer) sess.run(tf.global_variables_initializer()) print(sess.run(W)) sess.run(train_step) print(sess.run(W)) sess.run(train_step) print(sess.run(W)) print(sess.run(batch_X)) print(sess.run(batch_X))
def __init__(self, txt_file, mode, batch_size, num_classes, shuffle=True, buffer_size=1000): """ txt_file:TXT文件的目录,TXT文件中存储很多行,每一行包括图片路径和类别 """ self.txt_file = txt_file self.num_classes = num_classes self._read_txt_file() self.data_size = len(self.labels) if shuffle: self._shuffle_lists() self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string) self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32) data = Dataset.from_tensor_slices((self.img_paths, self.labels)) if mode =='training': data = data.map(self._parse_function_train, num_threads=8, output_buffer_size=100*batch_size) elif mode == 'inference': data = data.map(self._parse_function_inference, num_threads=8, output_buffer_size=100*batch_size) else: raise ValueError("Invalid model '{}'.".format(mode)) if shuffle: data = data.shuffle(buffer_size=buffer_size) data = data.batch(batch_size) self.data = data
def __init__(self, txt_file, mode, batch_size = 1, shuffle=True,buffer_size=1000): self._read_txt_file(txt_file) self.data_size = len(self.images) if shuffle: self._shuffle_lists() # convert lists to TF tensor self.images = convert_to_tensor(self.images, dtype=dtypes.string) self.labels = convert_to_tensor(self.labels, dtype=dtypes.string) # create dataset data = Dataset.from_tensor_slices((self.images, self.labels)) # distinguish between train/infer. when calling the parsing functions if mode == 'training': data = data.map(self._parse_function_train, num_threads=8, output_buffer_size=100*batch_size) elif mode == 'inference': data = data.map(self._parse_function_inference, num_threads=8, output_buffer_size=100*batch_size) else: raise ValueError("Invalid mode '%s'." % (mode)) if shuffle: data = data.shuffle(buffer_size=buffer_size) # create a new dataset with batches of images data = data.batch(batch_size) self.data = data
def _count_num_records(self): """ Counts the number of non-empty lines (the data samples) from the data_files. This function is called from get_size the first time. :return int: the number of non-empty lines in the data_files """ # TODO in TF 1.3 use: dataset = Dataset.list_files(self.data_files_pattern).repeat(1) from tensorflow.python.ops import gen_io_ops dataset = Dataset.from_tensor_slices( gen_io_ops.matching_files(self.data_files_pattern)).repeat(1) files = self._read_files_once(dataset) with tf.Graph().as_default(): dataset = self.dataset_class(files).repeat(1) samples = 0 try: next_element = dataset.make_one_shot_iterator().get_next() with tf.Session() as sess: while True: sess.run(next_element) samples += 1 except: pass return samples
def train_inputs(): with tf.name_scope("Train_Data"): #nonlocal X #nonlocal y input_placeholder = tf.placeholder( tf.float32, [None, FLAGS.max_video_length, FLAGS.frame_dim]) output_placeholder = tf.placeholder(tf.int32, [None, None]) train_data = Dataset.from_tensor_slices( (input_placeholder, output_placeholder)) train_data = train_data.repeat(None) train_data = train_data.shuffle(buffer_size=1450) train_data = train_data.batch(FLAGS.batch_size) iterator = train_data.make_initializable_iterator() next_video, next_caption = iterator.get_next() # just give it the name tf.identity(next_video[0], "video_0") tf.identity(next_caption[0], "caption_0") # set runhook to initialize the iterator iterator_initializer_hook.iterator_initializer_func = \ lambda sess: sess.run(iterator.initializer, feed_dict={input_placeholder: features, output_placeholder: captions}) return next_video, next_caption
def infer(): global novel_data_X, novel_data_y global infer_graph # 1. Build model structure for inference with infer_graph.as_default(): dataset = Dataset.from_tensor_slices((novel_data_X, novel_data_y)) dataset = dataset.batch(1) iterator = dataset.make_one_shot_iterator() _, pred = build_model(iterator, mode=ModeKeys.INFER) saver = tf.train.Saver(tf.global_variables()) with tf.Session(graph=infer_graph) as sess: # 2. Load model variables (from the last checkpoint) sess.run(tf.global_variables_initializer()) print('Original W1: {}'.format(sess.run('W1:0'))) saver.restore(sess, '/tmp/model.ckpt') print(' Loaded W1: {}'.format(sess.run('W1:0'))) # 3. predict every data while True: try: print(sess.run(pred)) except tf.errors.OutOfRangeError: break print('Inference is DONE')
def testMultipleIteratorsOnADatasetThatUsesFunctions(self): ds = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6]).map(math_ops.square) got1 = [x.numpy() for x in datasets.Iterator(ds)] self.assertAllEqual([1, 4, 9, 16, 25, 36], got1) got2 = [x.numpy() for x in datasets.Iterator(ds)] self.assertAllEqual(got1, got2)
def train_input_fn(features, labels, batch_size): """ Input function for training regression models. Args: features: A dict containing the name of each feature as key and the respective numpy arrays as values (first output of transform_fn) labels: A 1-dimensional numpy array containing only the price values for the target period (second output of transform_fn) batch_size: An integer value for the size of each batch Returns: Initializes an iterator with a tf.Tensor object that points to the next element Example: regressor.train(input_fn=lambda: train_input_fn(feature_X, lable_y, 10), steps=100) """ # Convert the inputs to a Dataset. dataset_ = Dataset.from_tensor_slices((dict(features), labels)) # Shuffle, repeat, and batch the examples. dataset_ = dataset_.batch(batch_size) # Build the Iterator, and return the read end of the pipeline. return dataset_.make_one_shot_iterator().get_next()
def __init__(self, txt_file, mode, batch_size, num_classes, shuffle=True, buffer_size=1000): self.txt_file = txt_file self.num_classes = num_classes self._read_txt_file() self.data_size = len(self.labels) if shuffle: self._shuffle_lists() self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string) self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32) data = Dataset.from_tensor_slices((self.img_paths, self.labels)) if mode == 'training': data = data.map(self._parse_function_train) elif mode == 'inference': data = data.map(self._parse_function_inference) else: raise ValueError("Invalid mode '%s'." % (mode)) if shuffle: data = data.shuffle(buffer_size=buffer_size) data = data.batch(batch_size) self.data = data
def train(): global data_X, data_y global train_graph # 1. Build model structure for training with train_graph.as_default(): tf.set_random_seed(1) dataset = Dataset.from_tensor_slices((data_X, data_y)) dataset = dataset.shuffle(buffer_size=64, seed=1) dataset = dataset.repeat(500) dataset = dataset.batch(4) iterator = dataset.make_one_shot_iterator() minimize_op, _ = build_model(iterator, mode=ModeKeys.TRAIN) saver = tf.train.Saver(tf.global_variables()) with tf.Session(graph=train_graph) as sess: # 2. Do training via gradient descent sess.run(tf.global_variables_initializer()) while True: try: sess.run(minimize_op) except tf.errors.OutOfRangeError: break # 3. Save model (variables) saver.save(sess, '/tmp/model.ckpt') print(' Trained W1: {}'.format(sess.run('W1:0'))) print('Training is DONE')
def predict(num): data_X, data_Y = prepare_nn_data('predict', num) predict_graph = tf.Graph() # build model structure for training with predict_graph.as_default(): dataset = Dataset.from_tensor_slices((data_X, data_Y)) dataset = dataset.map(_parse_function) dataset = dataset.batch(10) iterator = dataset.make_one_shot_iterator() model = Model(training=False) loss, probs = model.build(iterator) # define saver saver = tf.train.Saver(tf.global_variables()) # start a session to train with tf.Session(graph=predict_graph) as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, 'vgg/model_file/model') avg_score = 0.0 pred_label = sess.run(probs) for i in range(len(pred_label)): avg_score += pred_label[i][0] return avg_score / num
def __init__(self, images, labels, batch_size, num_classes, shuffle=True, buffer_size=1000): self.img_paths = images self.labels = labels self.num_classes = num_classes self.data_size = len(self.labels) self.pointer = 0 if shuffle: self._shuffle_lists() self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string) self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32) data = Dataset.from_tensor_slices((self.img_paths, self.labels)) data = data.map(self._parse_function_train, num_threads=8, output_buffer_size=100 * batch_size) data = data.batch(batch_size) self.data = data
def gen_noise_dataset(mnist_dataset): """Generate a TF Dataset with additional "noisy data" as the 11th class. :param mnist_dataset: mnist.DataSet, which has attributes images/labels :return: TF (API) Dataset """ # Create noisy data, which cannot be seen as a valid digit (almost) feature_n = mnist_dataset.images.shape[1] noisy_n = mnist_dataset.num_examples noisy_X = np.random.rand(noisy_n, feature_n) noisy_y = np.zeros((noisy_n, 11)) noisy_y += np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]) # Expand the original data labels to 11 classes orig_y = np.hstack( (mnist_dataset.labels, np.zeros((mnist_dataset.num_examples, 1)))) final_dataset = Dataset.from_tensor_slices((np.vstack( (mnist_dataset.images, noisy_X)), np.vstack((orig_y, noisy_y)))) assert final_dataset.output_shapes[0] == (784, ) assert final_dataset.output_shapes[1] == (11, ) return final_dataset
def __init__(self, txt_file, batch_size, num_classes, image_size,buffer_scale=100): self.image_size = image_size self.batch_size = batch_size self.txt_file = txt_file ##txt list file,stored as: imagename id self.num_classes = num_classes buffer_size = batch_size * buffer_scale # 读取图片 self.read_txt_file() self.dataset_size = len(self.labels) print "num of train datas=",self.dataset_size # 转换成Tensor self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string) self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32) # 创建数据集 data = Dataset.from_tensor_slices((self.img_paths, self.labels)) print "data type=",type(data) data = data.map(self.parse_function) data = data.repeat(1000) data = data.shuffle(buffer_size=buffer_size) # 设置self data Batch self.data = data.batch(batch_size) print "self.data type=",type(self.data)
def do_with_placeholder(): global data global label dataset = Dataset.from_tensor_slices((data, label)) dataset = dataset.batch(3) iterator = dataset.make_initializable_iterator() next_batch = iterator.get_next() X = tf.placeholder(tf.float32, shape=[None, 2]) y_ = tf.placeholder(tf.float32, shape=[None, 1]) W = tf.Variable([[0], [0]], dtype=tf.float32) b = tf.Variable([0], dtype=tf.float32) y = tf.matmul(X, W) + b loss = tf.losses.mean_squared_error(y_, y) train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) with tf.Session() as sess: sess.run(iterator.initializer) sess.run(tf.global_variables_initializer()) print(sess.run(W, feed_dict={X: data, y_: label})) batch_X_np, batch_y_np = sess.run(next_batch) sess.run(train_step, feed_dict={X: batch_X_np, y_: batch_y_np}) print(sess.run(W, feed_dict={X: data, y_: label})) batch_X_np, batch_y_np = sess.run(next_batch) sess.run(train_step, feed_dict={X: batch_X_np, y_: batch_y_np}) print(sess.run(W, feed_dict={X: data, y_: label})) print(sess.run(next_batch)) print(sess.run(next_batch))
def __create_internal_dataset(self, load_all_data: bool): cumulative_fraction = 0.0 for dataset_id in range(3): fraction = self.split_fraction[dataset_id] min_index = int(np.floor(cumulative_fraction * self.data_size)) max_index = int( np.floor((cumulative_fraction + fraction) * self.data_size)) cumulative_fraction += fraction if load_all_data: images = [] labels = [] num_images = max_index - min_index - 1 print("Loading {} images for {} dataset.".format( num_images, { 0: "TRAIN", 1: "TEST", 2: "VALIDAT." }[dataset_id])) for image_num, image_index in enumerate( range(min_index, max_index)): image_path = self.__image_file_names[image_index] image_label = self.__labels[image_index] if (image_num + 1) % 100 == 0: print("Loaded {} images of {}".format( image_num + 1, num_images)) im, l = self.__parse_image_load(image_path, image_label) images.append(im) labels.append(l) print("Loaded all {} images".format({ 0: "TRAIN", 1: "TEST", 2: "VALIDAT." }[dataset_id])) images = np.array(images) if not self.rgb: images = images[..., np.newaxis] print("Images shape: {}".format(images.shape)) images = convert_to_tensor(images, dtypes.float32) labels = convert_to_tensor(labels, dtypes.int32) else: images = convert_to_tensor( self.__image_file_names[min_index:max_index], dtypes.string) labels = convert_to_tensor(self.__labels[min_index:max_index], dtypes.int32) data = Dataset.from_tensor_slices((images, labels)) if not load_all_data: data = data.map(self.__parse_image) # Create a new dataset with batches of images data = data.batch(self.batch_size) if dataset_id == 0: self.__train_dataset = data elif dataset_id == 1: self.__test_dataset = data else: self.__validation_dataset = data
def batch_training(X, Y, M, batch_size, n_epochs): """Batch training queue convenience function.""" data_tr = Dataset.from_tensor_slices({'X': X, 'Y': Y, 'M': M}) \ .shuffle(buffer_size=1000, seed=RSEED) \ .repeat(n_epochs) \ .batch(batch_size) data = data_tr.make_one_shot_iterator().get_next() return data['X'], data['Y'], data['M']
def dataset_synthetic(self): # this is not actually generating a synthetic dataset but creates # dummy dataset that will result in the generation of synthetic data nc = self.hparams.num_classes seqs = tf.zeros(shape=[nc, self.hparams.max_seq_len], dtype=tf.int32) seq_lens = tf.ones(shape=[nc, 1], dtype=tf.int32) seq_lens *= self.hparams.max_seq_len labels = tf.constant(np.arange(nc), dtype=tf.int32) labels = tf.reshape(labels, [nc, 1]) seqs = Dataset.from_tensor_slices(seqs) seq_lens = Dataset.from_tensor_slices(seq_lens) labels = Dataset.from_tensor_slices(labels) dataset = Dataset.zip((seqs, seq_lens, labels)) dataset = self.repeat_and_shuffle(dataset) return dataset
def dataset_to_inputs(data, labels, batch_size): """Returns tuple (input_tf_node, labels_tf_node, iterator).""" dataset = Dataset.from_tensor_slices({'x': data, 'y': labels}) dataset = dataset.shuffle(buffer_size=10000) dataset = dataset.batch(batch_size) iterator = dataset.make_initializable_iterator() sample = iterator.get_next() x = sample['x'] y = sample['y'] return x, y, iterator
def encode(samples, n_repeat): def parse(x): return list(map(int, list(x))) encoded = [(parse(q), parse(a)) for q, a in samples] q, a = zip(*encoded) q, a = np.array(q, np.int32), np.array(a, np.int32) return Dataset.from_tensor_slices( (q, a)).shuffle(self.batch_size * 10).repeat(n_repeat).batch( self.batch_size).make_one_shot_iterator()
def _get_feed(self, attrname, epochs=1): data = getattr(self, attrname) i, d = [tf.convert_to_tensor(x, tf.string) for x in list(zip(*data))] tfdataset = TFDataset.from_tensor_slices((i, d)) tfdataset = tfdataset.shuffle(buffer_size=len(data[0])) tfdataset = tfdataset.map(self._parse_images, num_threads=self.workers, output_buffer_size=1000) tfdataset = tfdataset.batch(self.batchsize) tfdataset = tfdataset.repeat(epochs) iterator = tfdataset.make_one_shot_iterator() return iterator.string_handle()
def get_dataset(features, targets=None, shuffle=True, n_epochs=1, batch_size=None): if targets is not None: dataset = Dataset.from_tensor_slices( (tf.constant(features, tf.float32), tf.constant(targets, tf.float32))) else: dataset = Dataset.from_tensor_slices(tf.constant(features, tf.float32)) if shuffle: dataset = dataset.shuffle(buffer_size=64, seed=27) if n_epochs > 1: dataset = dataset.repeat(n_epochs) if batch_size is None: dataset = dataset.batch(features.shape[0]) else: dataset = dataset.batch(batch_size) return dataset
def create_dataset(batch_size): files, labels = list_files_and_labels() files_const = tf.constant(files) labels_const = tf.one_hot(tf.constant(labels), depth=10) dataset = Dataset.from_tensor_slices((files_const, labels_const)) dataset = dataset.interleave(lambda filename, label: Dataset.from_tensors( (filename, label)).map(_parse_function, num_threads=1), cycle_length=10) # dataset = dataset.shuffle(buffer_size=10000) dataset = dataset.batch(batch_size) return dataset
def sub_input_fn(): dataset = Dataset.from_tensor_slices((imgs, labels)) # Pre-process dataset into correct form/batching/shuffle etc. dataset = preprocessor(dataset, batch_size, dataset_length, is_training) # Build iterator and return one_shot_iterator = dataset.make_one_shot_iterator() next_element = one_shot_iterator.get_next() # Return in a dict so the premade estimators can use it. return {"x": next_element[0]}, next_element[1]
def __init__(self, txt_file, mode, batch_size, num_classes, shuffle=True, buffer_size=1000): """Create a new ImageDataGenerator Args: data_dir: Path to the dataset. batch_size: Number of images batch. num_classes: Number of classes in the dataset. shuffle: Whether or not to shuffle the data in the dataset and the initial file list.file Raises: ValueError: If an invalid mode is passed """ self.txt_file = txt_file self.num_classes = num_classes self._read_txt_file() # Number of samples in the dataset self.data_size = len(self.labels) # Initial shuffling of the file and label lists if shuffle: self._shuffle_lists() self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string) self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32) # Create dataset data = Dataset.from_tensor_slices((self.img_paths, self.labels)) if mode == "training": data = data.map(self._parse_function_train, num_threads=8, output_buffer_size=100 * batch_size) elif mode == "inference": data = data.map(self._parse_function_inference, num_threads=8, output_buffer_size=100 * batch_size) else: raise ValueError("Invalid model '%s'." % (mode)) if shuffle: data = data.shuffle(buffer_size=buffer_size) data = data.batch(batch_size) self.data = data
def input_fn(): with tf.variable_scope("input_fn"), tf.device("/cpu:0"): filename_dataset = Dataset.from_tensor_slices(list(filenames)) def decode_image(filename): image = tf.image.decode_jpeg(tf.read_file(filename), channels=3) image = tf.image.resize_images(image, [224, 224]) image = tf.to_float(image) return image image_dataset = filename_dataset.map(decode_image) return image_dataset, None
def main(argv=None): ''' ''' main.__doc__ = __doc__ argv = sys.argv if argv is None else sys.argv.extend(argv) desc = main.__doc__ # .format(os.path.basename(__file__)) # CLI parser args = parser_(desc) mgpu = 1 if getattr(args, 'mgpu', None) is None else args.mgpu # input image dimensions img_rows, img_cols, img_chns = 28, 28, 1 # number of convolutional filters to use filters = 64 # convolution kernel size num_conv = 3 gpus_list = get_available_gpus(mgpu) ngpus = len(gpus_list) batch_size = 128 * ngpus if K.image_data_format() == 'channels_first': original_img_size = (img_chns, img_rows, img_cols) else: original_img_size = (img_rows, img_cols, img_chns) latent_dim = 2 intermediate_dim = 128 epsilon_std = 1.0 epochs = args.epochs # 5 # train the VAE on MNIST digits (x_train, _), (x_test, y_test) = mnist.load_data() x_train = x_train.astype('float32') / 255. x_train = x_train.reshape((x_train.shape[0],) + original_img_size) x_test = x_test.astype('float32') / 255. x_test = x_test.reshape((x_test.shape[0],) + original_img_size) print('x_train.shape:', x_train.shape) train_samples = x_train.shape[0] steps_per_epoch = int(round(float(train_samples) / batch_size + 0.5)) # Create the dataset and its associated one-shot iterator. buffer_size = 10000 dataset = Dataset.from_tensor_slices(x_train) dataset = dataset.repeat() dataset = dataset.shuffle(buffer_size) dataset = dataset.batch(batch_size) iterator = dataset.make_one_shot_iterator() x_train_batch = iterator.get_next() ldict = make_shared_layers_dict( img_chns, img_rows, img_cols, batch_size, filters, num_conv, intermediate_dim, latent_dim, epsilon_std) # ldict is a dictionary that holds all layers. Since these layers are # instantiated once, they are shared amongs vae, encoder, and generator. x = Input(tensor=x_train_batch) vae_serial = make_vae(ldict, x) # : :type vae: Model vae = make_parallel(vae_serial, gpus_list) lr = 0.001 * ngpus opt = RMSprop(lr) # 'rmsprop' # opt = tf.train.RMSPropOptimizer(lr) # opt = TFOptimizer(opt) vae.compile(optimizer=opt, loss=None) # vae.summary() print_mgpu_modelsummary(vae) callbacks = [BatchTiming(), SamplesPerSec(batch_size)] # Fit the model using data from the TF data tensors. vae.fit(steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=callbacks) x = Input(shape=original_img_size) vae_val = make_vae(ldict, x) vae_val.compile(optimizer=opt, loss=None) loss = vae_val.evaluate(x=x_test, y=None, batch_size=batch_size // ngpus) print('\n\nVAE VALIDATION LOSS: {}'.format(loss)) x = Input(shape=original_img_size) z_mean, _ = get_encoded(ldict, x) encoder = Model(x, z_mean) # : :type encoder: Model decoder_input = Input(shape=(latent_dim,)) x_decoded_mean_squash = get_decoded(ldict, decoder_input) generator = Model(decoder_input, x_decoded_mean_squash) # : :type generator: Model # display a 2D plot of the digit classes in the latent space x_test_encoded = encoder.predict(x_test, batch_size=batch_size) plt.figure(figsize=(6, 6)) plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test) plt.colorbar() # plt.show() plt.savefig('vae_scatter.ps') plt.close() # display a 2D manifold of the digits n = 15 # figure with 15x15 digits digit_size = 28 figure = np.zeros((digit_size * n, digit_size * n)) # Linearly spaced coordinates on the unit square were transformed through # the inverse CDF (ppf) of the Gaussian # To produce values of the latent variables z, since the prior of the # latent space is Gaussian grid_x = norm.ppf(np.linspace(0.05, 0.95, n)) grid_y = norm.ppf(np.linspace(0.05, 0.95, n)) for i, yi in enumerate(grid_x): for j, xi in enumerate(grid_y): z_sample = np.array([[xi, yi]]) z_sample = np.tile(z_sample, batch_size).reshape(batch_size, 2) x_decoded = generator.predict(z_sample, batch_size=batch_size) digit = x_decoded[0].reshape(digit_size, digit_size) figure[i * digit_size: (i + 1) * digit_size, j * digit_size: (j + 1) * digit_size] = digit plt.figure(figsize=(10, 10)) plt.imshow(figure, cmap='Greys_r') # plt.show() plt.savefig('vae_digit.ps') plt.close()
def __init__(self, txt_file, mode, batch_size, num_classes, shuffle=True, buffer_size=1000): """Create a new ImageDataGenerator. Recieves a path string to a text file, which consists of many lines, where each line has first a path string to an image and seperated by a space an integer, referring to the class number. Using this data, this class will create TensrFlow datasets, that can be used to train e.g. a convolutional neural network. Args: txt_file: Path to the text file. mode: Either 'training' or 'validation'. Depending on this value, different parsing functions will be used. batch_size: Number of images per batch. num_classes: Number of classes in the dataset. shuffle: Wether or not to shuffle the data in the dataset and the initial file list. buffer_size: Number of images used as buffer for TensorFlows shuffling of the dataset. Raises: ValueError: If an invalid mode is passed. """ self.txt_file = txt_file self.num_classes = num_classes # retrieve the data from the text file self._read_txt_file() # number of samples in the dataset self.data_size = len(self.labels) # initial shuffling of the file and label lists (together!) if shuffle: self._shuffle_lists() # convert lists to TF tensor self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string) self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32) # create dataset data = Dataset.from_tensor_slices((self.img_paths, self.labels)) # distinguish between train/infer. when calling the parsing functions if mode == 'training': data = data.map(self._parse_function_train, num_threads=8, output_buffer_size=100*batch_size) elif mode == 'inference': data = data.map(self._parse_function_inference, num_threads=8, output_buffer_size=100*batch_size) else: raise ValueError("Invalid mode '%s'." % (mode)) # shuffle the first `buffer_size` elements of the dataset if shuffle: data = data.shuffle(buffer_size=buffer_size) # create a new dataset with batches of images data = data.batch(batch_size) self.data = data
import numpy as np from tensorflow.contrib.data import Dataset # load your data or create your data in here npx = np.random.uniform(-1, 1, (1000, 1)) # x data npy = np.power(npx, 2) + np.random.normal(0, 0.1, size=npx.shape) # y data npx_train, npx_test = np.split(npx, [800]) # training and test data npy_train, npy_test = np.split(npy, [800]) # use placeholder, later you may need different data, pass the different data into placeholder tfx = tf.placeholder(npx_train.dtype, npx_train.shape) tfy = tf.placeholder(npy_train.dtype, npy_train.shape) # create dataloader dataset = Dataset.from_tensor_slices((tfx, tfy)) dataset = dataset.shuffle(buffer_size=1000) # choose data randomly from this buffer dataset = dataset.batch(32) # batch size you will use dataset = dataset.repeat(3) # repeat for 3 epochs iterator = dataset.make_initializable_iterator() # later we have to initialize this one # your network bx, by = iterator.get_next() # use batch to update l1 = tf.layers.dense(bx, 10, tf.nn.relu) out = tf.layers.dense(l1, npy.shape[1]) loss = tf.losses.mean_squared_error(by, out) train = tf.train.GradientDescentOptimizer(0.1).minimize(loss) sess = tf.Session() # need to initialize the iterator in this case sess.run([iterator.initializer, tf.global_variables_initializer()], feed_dict={tfx: npx_train, tfy: npy_train})
return predictions batch_size = 128 buffer_size = 10000 steps_per_epoch = int(np.ceil(60000 / float(batch_size))) # = 469 epochs = 5 num_classes = 10 (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.astype(np.float32) / 255 x_train = np.expand_dims(x_train, -1) y_train = tf.one_hot(y_train, num_classes) # Create the dataset and its associated one-shot iterator. dataset = Dataset.from_tensor_slices((x_train, y_train)) dataset = dataset.repeat() dataset = dataset.shuffle(buffer_size) dataset = dataset.batch(batch_size) iterator = dataset.make_one_shot_iterator() # Model creation using tensors from the get_next() graph node. inputs, targets = iterator.get_next() model_input = layers.Input(tensor=inputs) model_output = cnn_layers(model_input) train_model = keras.models.Model(inputs=model_input, outputs=model_output) train_model.compile(optimizer=keras.optimizers.RMSprop(lr=2e-3, decay=1e-5), loss='categorical_crossentropy', metrics=['accuracy'], target_tensors=[targets])
def main(argv=None): ''' ''' main.__doc__ = __doc__ argv = sys.argv if argv is None else sys.argv.extend(argv) desc = main.__doc__ # .format(os.path.basename(__file__)) # CLI parser args = parser_(desc) nranks_per_gpu = args.nranks_per_gpu local_rank = hvd.local_rank() gpu_local_rank = local_rank // nranks_per_gpu print('local_rank, GPU_LOCAL_RANK: {}, {}'.format( local_rank, gpu_local_rank)) # Pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True # config.gpu_options.visible_device_list = str(hvd.local_rank()) config.gpu_options.visible_device_list = str(gpu_local_rank) K.set_session(tf.Session(config=config)) # input image dimensions img_rows, img_cols, img_chns = 28, 28, 1 # number of convolutional filters to use filters = 64 # convolution kernel size num_conv = 3 hvdsize = hvd.size() batch_size = 128 # 100 if K.image_data_format() == 'channels_first': original_img_size = (img_chns, img_rows, img_cols) else: original_img_size = (img_rows, img_cols, img_chns) latent_dim = 2 intermediate_dim = 128 epsilon_std = 1.0 epochs = args.epochs # 5 # train the VAE on MNIST digits (x_train, _), (x_test, y_test) = mnist.load_data() x_train = x_train.astype('float32') / 255. x_train = x_train.reshape((x_train.shape[0],) + original_img_size) x_test = x_test.astype('float32') / 255. x_test = x_test.reshape((x_test.shape[0],) + original_img_size) if hvd.rank() == 0: print('x_train.shape:', x_train.shape) train_samples = x_train.shape[0] # steps_per_epoch = train_samples // batch_size // hvdsize speedupopt = args.speedup if speedupopt == SpeedupOpts.imgspersec: steps_per_epoch = train_samples // batch_size else: steps_per_epoch = int(round( float(train_samples) / batch_size / hvdsize + 0.5)) # Create the dataset and its associated one-shot iterator. buffer_size = 10000 dataset = Dataset.from_tensor_slices(x_train) dataset = dataset.repeat() dataset = dataset.shuffle(buffer_size) dataset = dataset.batch(batch_size) iterator = dataset.make_one_shot_iterator() x_train_batch = iterator.get_next() ldict = make_shared_layers_dict( img_chns, img_rows, img_cols, batch_size, filters, num_conv, intermediate_dim, latent_dim, epsilon_std) # ldict is a dictionary that holds all layers. Since these layers are # instantiated once, they are shared amongs vae, encoder, and generator. x = Input(tensor=x_train_batch) vae = make_vae(ldict, x) # : :type vae: Model lr = 0.001 # * hvdsize opt = tf.train.RMSPropOptimizer(lr) # Add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) # , use_locking=True) opt = TFOptimizer(opt) # opt = RMSprop(lr) # Add Horovod Distributed Optimizer. # opt = hvd_keras.DistributedOptimizer(opt) # , use_locking=True) vae.compile(optimizer=opt, loss=None) if hvd.rank() == 0: vae.summary() callbacks = [] if hvd.rank() == 0: callbacks += [BatchTiming(), SamplesPerSec(batch_size * hvdsize)] sess = K.get_session() sess.run(hvd.broadcast_global_variables(0)) # Fit the model using data from the TF data tensors. vae.fit(steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=callbacks) if hvd.rank() == 0: x = Input(shape=original_img_size) vae_val = make_vae(ldict, x) vae_val.compile(optimizer=opt, loss=None) loss = vae_val.evaluate(x=x_test, y=None, batch_size=batch_size) print('\n\nVAE VALIDATION LOSS: {}'.format(loss)) x = Input(shape=original_img_size) z_mean, _ = get_encoded(ldict, x) encoder = Model(x, z_mean) # : :type encoder: Model decoder_input = Input(shape=(latent_dim,)) x_decoded_mean_squash = get_decoded(ldict, decoder_input) generator = Model(decoder_input, x_decoded_mean_squash) # : :type generator: Model # display a 2D plot of the digit classes in the latent space x_test_encoded = encoder.predict(x_test, batch_size=batch_size) plt.figure(figsize=(6, 6)) plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test) plt.colorbar() # plt.show() plt.savefig('vae_scatter.ps') plt.close() # display a 2D manifold of the digits n = 15 # figure with 15x15 digits digit_size = 28 figure = np.zeros((digit_size * n, digit_size * n)) # Linearly spaced coordinates on the unit square were transformed # through the inverse CDF (ppf) of the Gaussian # To produce values of the latent variables z, since the prior of the # latent space is Gaussian grid_x = norm.ppf(np.linspace(0.05, 0.95, n)) grid_y = norm.ppf(np.linspace(0.05, 0.95, n)) for i, yi in enumerate(grid_x): for j, xi in enumerate(grid_y): z_sample = np.array([[xi, yi]]) z_sample = np.tile(z_sample, batch_size).reshape(batch_size, 2) x_decoded = generator.predict(z_sample, batch_size=batch_size) digit = x_decoded[0].reshape(digit_size, digit_size) figure[i * digit_size: (i + 1) * digit_size, j * digit_size: (j + 1) * digit_size] = digit plt.figure(figsize=(10, 10)) plt.imshow(figure, cmap='Greys_r') # plt.show() plt.savefig('vae_digit.ps') plt.close() K.clear_session()