Esempio n. 1
0
File: data.py Progetto: tongda/ImSAT
        def input_fn():
            caption_dataset = Dataset.from_tensor_slices(list(caps))
            filename_dataset = Dataset.from_tensor_slices(filenames)

            def my_split(text):
                text = text.decode("utf-8")
                # todo: take care of the unknown character.
                idx = [self.word_to_idx.get(ch, 0) for ch in text]
                idx.insert(0, self.word_to_idx['<START>'])
                idx.append(self.word_to_idx['<END>'])
                return np.array(idx, dtype=np.int32)

            # todo: tf has issue with `tf.string_split` with unicode
            #   https://github.com/tensorflow/tensorflow/issues/11399
            #   so I use `py_func` here.
            index_dataset = caption_dataset.map(
                lambda text: tf.py_func(my_split, [text], tf.int32),
                num_threads=8)

            image_dataset = filename_dataset.map(
                get_decode_image_fn(is_training=is_distort), num_threads=8)

            caption_structure = {
                "raw": caption_dataset,
                "index": index_dataset
            }
            return image_dataset, caption_structure
Esempio n. 2
0
def mnist():
    # load mnist data
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

    # make Datasets
    train_dataset = Dataset.from_tensor_slices(
        (mnist.train._images, mnist.train._labels))
    test_dataset = Dataset.from_tensor_slices(
        (mnist.test._images, mnist.test._labels))

    return train_dataset, test_dataset
def getimage(image, batch_size, trainnum=2000, testnum=500):

    train_image = []
    train_label = []
    test_image = []
    test_label = []
    if image == 'FID':
        image = os.walk(r'D:\360download\FIDS30')
        classnum = 0
        for i in image:
            if i[1] == []:

                imagepath = glob.glob('%s\\*.jpg' % (i[0]))

                for i in range(len(imagepath[0:-5])):  #取后五张作为测试数据,其余训练
                    train_image.append(imagepath[i])
                    train_label.append(classnum)
                for i in range(5):
                    test_image.append(imagepath[i - 6])
                    test_label.append(classnum)
                classnum = classnum + 1
        # 调用图片生成器,把训练集图片转换成三维数组
        tr_data = ImageDataGenerator(images=train_image,
                                     labels=train_label,
                                     batch_size=batch_size,
                                     num_classes=classnum)

        # 调用图片生成器,把测试集图片转换成三维数组
        test_data = ImageDataGenerator(images=test_image,
                                       labels=test_label,
                                       batch_size=batch_size,
                                       num_classes=classnum,
                                       shuffle=False)
        tr_data = tr_data.data
        test_data = test_data.data
        return tr_data, test_data, classnum
    if image == 'cifar10':
        cifar10_dir = 'cifar-10-batches-py'
        X_train, y_train, X_test, y_test = load_CIFAR10(
            cifar10_dir)  #加载cifar数据
        train_image = X_train[list(range(trainnum))]
        train_label = y_train[list(range(trainnum))]
        test_image = X_test[list(range(testnum))]
        test_label = y_test[list(range(testnum))]
        classnum = 10
        tr_data = Dataset.from_tensor_slices((train_image, train_label))
        tr_data = tr_data.map(resize)
        tr_data = tr_data.batch(batch_size)
        test_data = Dataset.from_tensor_slices((test_image, test_label))
        test_data = test_data.map(resize)
        test_data = test_data.batch(batch_size)
        return tr_data, test_data, classnum
def configure_dataset():
    """
    :returns:
    """
    logger = get_logger()

    image_list = []
    image_list.extend(
        glob.glob(
            os.path.join(FLAGS.dataset_path, "ch4_training_images", "*.jpg")))

    image_list_op = tf.constant(image_list)

    logger.debug("image_list_op: {}".format(image_list_op))

    dataset_iterator = Dataset.from_tensor_slices(image_list_op)

    next_images = dataset_iterator.make_one_shot_iterator().get_next()

    #: Create a random shuffle queue.
    queue = tf.RandomShuffleQueue(capacity=20,
                                  min_after_dequeue=int(0.9 * 20),
                                  shapes=next_images.shape,
                                  dtypes=next_images.dtype)

    #: Create an op to enqueue one item.
    enqueue = queue.enqueue(next_images)

    #: Create a queue runner.
    qr = tf.train.QueueRunner(queue, [enqueue] * 2)

    tf.train.add_queue_runner(qr)

    return queue.dequeue_many(FLAGS.batch_size)
Esempio n. 5
0
def do_without_placeholder():
    global data
    global label

    dataset = Dataset.from_tensor_slices((data, label))
    dataset = dataset.batch(3)
    iterator = dataset.make_initializable_iterator()
    (batch_X, batch_y) = iterator.get_next()

    W = tf.Variable([[0], [0]], dtype=tf.float32)
    b = tf.Variable([0], dtype=tf.float32)
    y = tf.matmul(batch_X, W) + b
    loss = tf.losses.mean_squared_error(batch_y, y)
    train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

    with tf.Session() as sess:
        sess.run(iterator.initializer)
        sess.run(tf.global_variables_initializer())
        print(sess.run(W))

        sess.run(train_step)

        print(sess.run(W))

        sess.run(train_step)

        print(sess.run(W))
        print(sess.run(batch_X))
        print(sess.run(batch_X))
Esempio n. 6
0
    def __init__(self, txt_file, mode, batch_size, num_classes, shuffle=True, buffer_size=1000):

        """
        txt_file:TXT文件的目录,TXT文件中存储很多行,每一行包括图片路径和类别
        """
        self.txt_file = txt_file
        self.num_classes = num_classes
        self._read_txt_file()

        self.data_size = len(self.labels)

        if shuffle:
            self._shuffle_lists()

        self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
        self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)

        data = Dataset.from_tensor_slices((self.img_paths, self.labels))

        if mode =='training':
            data = data.map(self._parse_function_train, num_threads=8, output_buffer_size=100*batch_size)
        elif mode == 'inference':
            data = data.map(self._parse_function_inference, num_threads=8, output_buffer_size=100*batch_size)
        else:
            raise ValueError("Invalid model '{}'.".format(mode))

        if shuffle:
            data = data.shuffle(buffer_size=buffer_size)

        data = data.batch(batch_size)
        self.data = data
Esempio n. 7
0
    def __init__(self, txt_file, mode, batch_size = 1, shuffle=True,buffer_size=1000):
        
        self._read_txt_file(txt_file)

        self.data_size = len(self.images)

        if shuffle:
            self._shuffle_lists()

        # convert lists to TF tensor
        self.images = convert_to_tensor(self.images, dtype=dtypes.string)
        self.labels = convert_to_tensor(self.labels, dtype=dtypes.string)

        # create dataset
        data = Dataset.from_tensor_slices((self.images, self.labels))

        # distinguish between train/infer. when calling the parsing functions
        if mode == 'training':
            data = data.map(self._parse_function_train, num_threads=8, output_buffer_size=100*batch_size)

        elif mode == 'inference':
            data = data.map(self._parse_function_inference, num_threads=8, output_buffer_size=100*batch_size)

        else:
            raise ValueError("Invalid mode '%s'." % (mode))
            
        if shuffle:
            data = data.shuffle(buffer_size=buffer_size)

        # create a new dataset with batches of images
        data = data.batch(batch_size)

        self.data = data
Esempio n. 8
0
    def _count_num_records(self):
        """
        Counts the number of non-empty lines (the data samples) from the data_files. This function
        is called from get_size the first time.
        :return int: the number of non-empty lines in the data_files
        """
        # TODO in TF 1.3 use: dataset = Dataset.list_files(self.data_files_pattern).repeat(1)
        from tensorflow.python.ops import gen_io_ops

        dataset = Dataset.from_tensor_slices(
            gen_io_ops.matching_files(self.data_files_pattern)).repeat(1)

        files = self._read_files_once(dataset)
        with tf.Graph().as_default():
            dataset = self.dataset_class(files).repeat(1)
            samples = 0
            try:
                next_element = dataset.make_one_shot_iterator().get_next()
                with tf.Session() as sess:
                    while True:
                        sess.run(next_element)
                        samples += 1
            except:
                pass
        return samples
Esempio n. 9
0
    def train_inputs():
        with tf.name_scope("Train_Data"):
            #nonlocal X
            #nonlocal y

            input_placeholder = tf.placeholder(
                tf.float32, [None, FLAGS.max_video_length, FLAGS.frame_dim])
            output_placeholder = tf.placeholder(tf.int32, [None, None])

            train_data = Dataset.from_tensor_slices(
                (input_placeholder, output_placeholder))

            train_data = train_data.repeat(None)
            train_data = train_data.shuffle(buffer_size=1450)
            train_data = train_data.batch(FLAGS.batch_size)

            iterator = train_data.make_initializable_iterator()
            next_video, next_caption = iterator.get_next()

            # just give it the name
            tf.identity(next_video[0], "video_0")
            tf.identity(next_caption[0], "caption_0")

            # set runhook to initialize the iterator
            iterator_initializer_hook.iterator_initializer_func = \
                lambda sess: sess.run(iterator.initializer,
                    feed_dict={input_placeholder: features,
                                output_placeholder: captions})

            return next_video, next_caption
Esempio n. 10
0
def infer():
    global novel_data_X, novel_data_y
    global infer_graph

    # 1. Build model structure for inference
    with infer_graph.as_default():
        dataset = Dataset.from_tensor_slices((novel_data_X, novel_data_y))
        dataset = dataset.batch(1)
        iterator = dataset.make_one_shot_iterator()

        _, pred = build_model(iterator, mode=ModeKeys.INFER)
        saver = tf.train.Saver(tf.global_variables())

    with tf.Session(graph=infer_graph) as sess:
        # 2. Load model variables (from the last checkpoint)
        sess.run(tf.global_variables_initializer())
        print('Original W1: {}'.format(sess.run('W1:0')))
        saver.restore(sess, '/tmp/model.ckpt')
        print('  Loaded W1: {}'.format(sess.run('W1:0')))

        # 3. predict every data
        while True:
            try:
                print(sess.run(pred))
            except tf.errors.OutOfRangeError:
                break

    print('Inference is DONE')
Esempio n. 11
0
  def testMultipleIteratorsOnADatasetThatUsesFunctions(self):
    ds = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6]).map(math_ops.square)

    got1 = [x.numpy() for x in datasets.Iterator(ds)]
    self.assertAllEqual([1, 4, 9, 16, 25, 36], got1)
    got2 = [x.numpy() for x in datasets.Iterator(ds)]
    self.assertAllEqual(got1, got2)
Esempio n. 12
0
def train_input_fn(features, labels, batch_size):
    """ Input function for training regression models.

    Args:
        features: A dict containing the name of each feature as key and the respective numpy arrays as values
                  (first output of transform_fn)
        labels: A 1-dimensional numpy array containing only the price values for the target period
                (second output of transform_fn)
        batch_size: An integer value for the size of each batch

    Returns:
        Initializes an iterator with a tf.Tensor object that points to the next element


    Example:
        regressor.train(input_fn=lambda: train_input_fn(feature_X, lable_y, 10), steps=100)

    """
    # Convert the inputs to a Dataset.
    dataset_ = Dataset.from_tensor_slices((dict(features), labels))

    # Shuffle, repeat, and batch the examples.
    dataset_ = dataset_.batch(batch_size)

    # Build the Iterator, and return the read end of the pipeline.
    return dataset_.make_one_shot_iterator().get_next()
Esempio n. 13
0
    def __init__(self,
                 txt_file,
                 mode,
                 batch_size,
                 num_classes,
                 shuffle=True,
                 buffer_size=1000):
        self.txt_file = txt_file
        self.num_classes = num_classes
        self._read_txt_file()
        self.data_size = len(self.labels)

        if shuffle:
            self._shuffle_lists()

        self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
        self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)

        data = Dataset.from_tensor_slices((self.img_paths, self.labels))

        if mode == 'training':
            data = data.map(self._parse_function_train)

        elif mode == 'inference':
            data = data.map(self._parse_function_inference)

        else:
            raise ValueError("Invalid mode '%s'." % (mode))

        if shuffle:
            data = data.shuffle(buffer_size=buffer_size)

        data = data.batch(batch_size)
        self.data = data
Esempio n. 14
0
def train():
    global data_X, data_y
    global train_graph

    # 1. Build model structure for training
    with train_graph.as_default():
        tf.set_random_seed(1)
        dataset = Dataset.from_tensor_slices((data_X, data_y))
        dataset = dataset.shuffle(buffer_size=64, seed=1)
        dataset = dataset.repeat(500)
        dataset = dataset.batch(4)
        iterator = dataset.make_one_shot_iterator()

        minimize_op, _ = build_model(iterator, mode=ModeKeys.TRAIN)
        saver = tf.train.Saver(tf.global_variables())

    with tf.Session(graph=train_graph) as sess:
        # 2. Do training via gradient descent
        sess.run(tf.global_variables_initializer())
        while True:
            try:
                sess.run(minimize_op)
            except tf.errors.OutOfRangeError:
                break
        # 3. Save model (variables)
        saver.save(sess, '/tmp/model.ckpt')
        print(' Trained W1: {}'.format(sess.run('W1:0')))

    print('Training is DONE')
Esempio n. 15
0
def predict(num):
    data_X, data_Y = prepare_nn_data('predict', num)
    predict_graph = tf.Graph()
    # build model structure for training
    with predict_graph.as_default():

        dataset = Dataset.from_tensor_slices((data_X, data_Y))
        dataset = dataset.map(_parse_function)
        dataset = dataset.batch(10)
        iterator = dataset.make_one_shot_iterator()

        model = Model(training=False)
        loss, probs = model.build(iterator)

        # define saver
        saver = tf.train.Saver(tf.global_variables())

    # start a session to train
    with tf.Session(graph=predict_graph) as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, 'vgg/model_file/model')
        avg_score = 0.0
        pred_label = sess.run(probs)
        for i in range(len(pred_label)):
            avg_score += pred_label[i][0]

        return avg_score / num
Esempio n. 16
0
    def __init__(self,
                 images,
                 labels,
                 batch_size,
                 num_classes,
                 shuffle=True,
                 buffer_size=1000):

        self.img_paths = images
        self.labels = labels
        self.num_classes = num_classes
        self.data_size = len(self.labels)
        self.pointer = 0

        if shuffle:
            self._shuffle_lists()
        self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
        self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)
        data = Dataset.from_tensor_slices((self.img_paths, self.labels))
        data = data.map(self._parse_function_train,
                        num_threads=8,
                        output_buffer_size=100 * batch_size)

        data = data.batch(batch_size)

        self.data = data
Esempio n. 17
0
def gen_noise_dataset(mnist_dataset):
    """Generate a TF Dataset with additional "noisy data" as the 11th class.

    :param mnist_dataset: mnist.DataSet, which has attributes images/labels
    :return: TF (API) Dataset
    """

    # Create noisy data, which cannot be seen as a valid digit (almost)
    feature_n = mnist_dataset.images.shape[1]
    noisy_n = mnist_dataset.num_examples
    noisy_X = np.random.rand(noisy_n, feature_n)
    noisy_y = np.zeros((noisy_n, 11))
    noisy_y += np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

    # Expand the original data labels to 11 classes
    orig_y = np.hstack(
        (mnist_dataset.labels, np.zeros((mnist_dataset.num_examples, 1))))

    final_dataset = Dataset.from_tensor_slices((np.vstack(
        (mnist_dataset.images, noisy_X)), np.vstack((orig_y, noisy_y))))

    assert final_dataset.output_shapes[0] == (784, )
    assert final_dataset.output_shapes[1] == (11, )

    return final_dataset
Esempio n. 18
0
    def __init__(self, txt_file, batch_size, num_classes,
                 image_size,buffer_scale=100):
        self.image_size = image_size
        self.batch_size = batch_size
        self.txt_file = txt_file ##txt list file,stored as: imagename id
        self.num_classes = num_classes
        buffer_size = batch_size * buffer_scale

        # 读取图片
        self.read_txt_file()
        self.dataset_size = len(self.labels) 
        print "num of train datas=",self.dataset_size
        # 转换成Tensor
        self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
        self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)

        # 创建数据集
        data = Dataset.from_tensor_slices((self.img_paths, self.labels))
        print "data type=",type(data)
        data = data.map(self.parse_function)
        data = data.repeat(1000)
        data = data.shuffle(buffer_size=buffer_size)

        # 设置self data Batch
        self.data = data.batch(batch_size)
        print "self.data type=",type(self.data)
Esempio n. 19
0
def do_with_placeholder():
    global data
    global label

    dataset = Dataset.from_tensor_slices((data, label))
    dataset = dataset.batch(3)
    iterator = dataset.make_initializable_iterator()
    next_batch = iterator.get_next()

    X = tf.placeholder(tf.float32, shape=[None, 2])
    y_ = tf.placeholder(tf.float32, shape=[None, 1])

    W = tf.Variable([[0], [0]], dtype=tf.float32)
    b = tf.Variable([0], dtype=tf.float32)
    y = tf.matmul(X, W) + b
    loss = tf.losses.mean_squared_error(y_, y)
    train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

    with tf.Session() as sess:
        sess.run(iterator.initializer)
        sess.run(tf.global_variables_initializer())
        print(sess.run(W, feed_dict={X: data, y_: label}))

        batch_X_np, batch_y_np = sess.run(next_batch)
        sess.run(train_step, feed_dict={X: batch_X_np, y_: batch_y_np})

        print(sess.run(W, feed_dict={X: data, y_: label}))

        batch_X_np, batch_y_np = sess.run(next_batch)
        sess.run(train_step, feed_dict={X: batch_X_np, y_: batch_y_np})

        print(sess.run(W, feed_dict={X: data, y_: label}))

        print(sess.run(next_batch))
        print(sess.run(next_batch))
Esempio n. 20
0
    def __create_internal_dataset(self, load_all_data: bool):
        cumulative_fraction = 0.0
        for dataset_id in range(3):
            fraction = self.split_fraction[dataset_id]
            min_index = int(np.floor(cumulative_fraction * self.data_size))
            max_index = int(
                np.floor((cumulative_fraction + fraction) * self.data_size))
            cumulative_fraction += fraction

            if load_all_data:
                images = []
                labels = []
                num_images = max_index - min_index - 1
                print("Loading {} images for {} dataset.".format(
                    num_images, {
                        0: "TRAIN",
                        1: "TEST",
                        2: "VALIDAT."
                    }[dataset_id]))
                for image_num, image_index in enumerate(
                        range(min_index, max_index)):
                    image_path = self.__image_file_names[image_index]
                    image_label = self.__labels[image_index]
                    if (image_num + 1) % 100 == 0:
                        print("Loaded {} images of {}".format(
                            image_num + 1, num_images))
                    im, l = self.__parse_image_load(image_path, image_label)
                    images.append(im)
                    labels.append(l)
                print("Loaded all {} images".format({
                    0: "TRAIN",
                    1: "TEST",
                    2: "VALIDAT."
                }[dataset_id]))
                images = np.array(images)
                if not self.rgb:
                    images = images[..., np.newaxis]
                print("Images shape: {}".format(images.shape))
                images = convert_to_tensor(images, dtypes.float32)
                labels = convert_to_tensor(labels, dtypes.int32)
            else:
                images = convert_to_tensor(
                    self.__image_file_names[min_index:max_index],
                    dtypes.string)
                labels = convert_to_tensor(self.__labels[min_index:max_index],
                                           dtypes.int32)

            data = Dataset.from_tensor_slices((images, labels))
            if not load_all_data:
                data = data.map(self.__parse_image)

            # Create a new dataset with batches of images
            data = data.batch(self.batch_size)
            if dataset_id == 0:
                self.__train_dataset = data
            elif dataset_id == 1:
                self.__test_dataset = data
            else:
                self.__validation_dataset = data
Esempio n. 21
0
    def testMultipleIteratorsOnADatasetThatUsesFunctions(self):
        ds = Dataset.from_tensor_slices([1, 2, 3, 4, 5,
                                         6]).map(math_ops.square)

        got1 = [x.numpy() for x in datasets.Iterator(ds)]
        self.assertAllEqual([1, 4, 9, 16, 25, 36], got1)
        got2 = [x.numpy() for x in datasets.Iterator(ds)]
        self.assertAllEqual(got1, got2)
Esempio n. 22
0
def batch_training(X, Y, M, batch_size, n_epochs):
    """Batch training queue convenience function."""
    data_tr = Dataset.from_tensor_slices({'X': X, 'Y': Y, 'M': M}) \
        .shuffle(buffer_size=1000, seed=RSEED) \
        .repeat(n_epochs) \
        .batch(batch_size)
    data = data_tr.make_one_shot_iterator().get_next()
    return data['X'], data['Y'], data['M']
Esempio n. 23
0
    def dataset_synthetic(self):
        # this is not actually generating a synthetic dataset but creates
        # dummy dataset that will result in the generation of synthetic data

        nc = self.hparams.num_classes
        seqs = tf.zeros(shape=[nc, self.hparams.max_seq_len], dtype=tf.int32)
        seq_lens = tf.ones(shape=[nc, 1], dtype=tf.int32)
        seq_lens *= self.hparams.max_seq_len
        labels = tf.constant(np.arange(nc), dtype=tf.int32)
        labels = tf.reshape(labels, [nc, 1])

        seqs = Dataset.from_tensor_slices(seqs)
        seq_lens = Dataset.from_tensor_slices(seq_lens)
        labels = Dataset.from_tensor_slices(labels)
        dataset = Dataset.zip((seqs, seq_lens, labels))
        dataset = self.repeat_and_shuffle(dataset)

        return dataset
Esempio n. 24
0
def dataset_to_inputs(data, labels, batch_size):
    """Returns tuple (input_tf_node, labels_tf_node, iterator)."""
    dataset = Dataset.from_tensor_slices({'x': data, 'y': labels})
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_initializable_iterator()
    sample = iterator.get_next()
    x = sample['x']
    y = sample['y']
    return x, y, iterator
        def encode(samples, n_repeat):
            def parse(x):
                return list(map(int, list(x)))

            encoded = [(parse(q), parse(a)) for q, a in samples]
            q, a = zip(*encoded)
            q, a = np.array(q, np.int32), np.array(a, np.int32)
            return Dataset.from_tensor_slices(
                (q, a)).shuffle(self.batch_size * 10).repeat(n_repeat).batch(
                    self.batch_size).make_one_shot_iterator()
Esempio n. 26
0
 def _get_feed(self, attrname, epochs=1):
     data = getattr(self, attrname)
     i, d = [tf.convert_to_tensor(x, tf.string) for x in list(zip(*data))]
     tfdataset = TFDataset.from_tensor_slices((i, d))
     tfdataset = tfdataset.shuffle(buffer_size=len(data[0]))
     tfdataset = tfdataset.map(self._parse_images, num_threads=self.workers,
                               output_buffer_size=1000)
     tfdataset = tfdataset.batch(self.batchsize)
     tfdataset = tfdataset.repeat(epochs)
     iterator = tfdataset.make_one_shot_iterator()
     return iterator.string_handle()
Esempio n. 27
0
def get_dataset(features,
                targets=None,
                shuffle=True,
                n_epochs=1,
                batch_size=None):
    if targets is not None:
        dataset = Dataset.from_tensor_slices(
            (tf.constant(features,
                         tf.float32), tf.constant(targets, tf.float32)))
    else:
        dataset = Dataset.from_tensor_slices(tf.constant(features, tf.float32))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=64, seed=27)
    if n_epochs > 1:
        dataset = dataset.repeat(n_epochs)
    if batch_size is None:
        dataset = dataset.batch(features.shape[0])
    else:
        dataset = dataset.batch(batch_size)

    return dataset
Esempio n. 28
0
def create_dataset(batch_size):
    files, labels = list_files_and_labels()
    files_const = tf.constant(files)
    labels_const = tf.one_hot(tf.constant(labels), depth=10)

    dataset = Dataset.from_tensor_slices((files_const, labels_const))
    dataset = dataset.interleave(lambda filename, label: Dataset.from_tensors(
        (filename, label)).map(_parse_function, num_threads=1),
                                 cycle_length=10)
    #  dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(batch_size)
    return dataset
Esempio n. 29
0
    def sub_input_fn():
        dataset = Dataset.from_tensor_slices((imgs, labels))
        # Pre-process dataset into correct form/batching/shuffle etc.
        dataset = preprocessor(dataset, batch_size, dataset_length,
                               is_training)

        # Build iterator and return
        one_shot_iterator = dataset.make_one_shot_iterator()
        next_element = one_shot_iterator.get_next()

        # Return in a dict so the premade estimators can use it.
        return {"x": next_element[0]}, next_element[1]
Esempio n. 30
0
    def __init__(self,
                 txt_file,
                 mode,
                 batch_size,
                 num_classes,
                 shuffle=True,
                 buffer_size=1000):
        """Create a new ImageDataGenerator
		
		Args:
			data_dir: Path to the dataset.
			batch_size: Number of images batch.
			num_classes: Number of classes in the dataset.
			shuffle: Whether or not to shuffle the data in the dataset and the initial file list.file

		Raises:
			ValueError: If an invalid mode is passed
		"""

        self.txt_file = txt_file
        self.num_classes = num_classes

        self._read_txt_file()
        # Number of samples in the dataset
        self.data_size = len(self.labels)

        # Initial shuffling of the file and label lists
        if shuffle:
            self._shuffle_lists()

        self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
        self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)
        # Create dataset

        data = Dataset.from_tensor_slices((self.img_paths, self.labels))
        if mode == "training":
            data = data.map(self._parse_function_train,
                            num_threads=8,
                            output_buffer_size=100 * batch_size)

        elif mode == "inference":
            data = data.map(self._parse_function_inference,
                            num_threads=8,
                            output_buffer_size=100 * batch_size)
        else:
            raise ValueError("Invalid model '%s'." % (mode))

        if shuffle:
            data = data.shuffle(buffer_size=buffer_size)

        data = data.batch(batch_size)
        self.data = data
Esempio n. 31
0
    def input_fn():
        with tf.variable_scope("input_fn"), tf.device("/cpu:0"):
            filename_dataset = Dataset.from_tensor_slices(list(filenames))

            def decode_image(filename):
                image = tf.image.decode_jpeg(tf.read_file(filename),
                                             channels=3)
                image = tf.image.resize_images(image, [224, 224])
                image = tf.to_float(image)
                return image

            image_dataset = filename_dataset.map(decode_image)
        return image_dataset, None
def main(argv=None):
    '''
    '''
    main.__doc__ = __doc__
    argv = sys.argv if argv is None else sys.argv.extend(argv)
    desc = main.__doc__  # .format(os.path.basename(__file__))
    # CLI parser
    args = parser_(desc)

    mgpu = 1 if getattr(args, 'mgpu', None) is None else args.mgpu

    # input image dimensions
    img_rows, img_cols, img_chns = 28, 28, 1
    # number of convolutional filters to use
    filters = 64
    # convolution kernel size
    num_conv = 3

    gpus_list = get_available_gpus(mgpu)
    ngpus = len(gpus_list)

    batch_size = 128 * ngpus
    if K.image_data_format() == 'channels_first':
        original_img_size = (img_chns, img_rows, img_cols)
    else:
        original_img_size = (img_rows, img_cols, img_chns)
    latent_dim = 2
    intermediate_dim = 128
    epsilon_std = 1.0
    epochs = args.epochs  # 5

    # train the VAE on MNIST digits
    (x_train, _), (x_test, y_test) = mnist.load_data()

    x_train = x_train.astype('float32') / 255.
    x_train = x_train.reshape((x_train.shape[0],) + original_img_size)
    x_test = x_test.astype('float32') / 255.
    x_test = x_test.reshape((x_test.shape[0],) + original_img_size)

    print('x_train.shape:', x_train.shape)

    train_samples = x_train.shape[0]
    steps_per_epoch = int(round(float(train_samples) / batch_size + 0.5))

    # Create the dataset and its associated one-shot iterator.
    buffer_size = 10000
    dataset = Dataset.from_tensor_slices(x_train)
    dataset = dataset.repeat()
    dataset = dataset.shuffle(buffer_size)
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_one_shot_iterator()
    x_train_batch = iterator.get_next()

    ldict = make_shared_layers_dict(
        img_chns, img_rows, img_cols, batch_size, filters,
        num_conv, intermediate_dim, latent_dim, epsilon_std)
    # ldict is a dictionary that holds all layers. Since these layers are
    # instantiated once, they are shared amongs vae, encoder, and generator.

    x = Input(tensor=x_train_batch)
    vae_serial = make_vae(ldict, x)
    # :  :type vae: Model
    vae = make_parallel(vae_serial, gpus_list)

    lr = 0.001 * ngpus
    opt = RMSprop(lr)  # 'rmsprop'
    # opt = tf.train.RMSPropOptimizer(lr)
    # opt = TFOptimizer(opt)
    vae.compile(optimizer=opt, loss=None)
    # vae.summary()
    print_mgpu_modelsummary(vae)

    callbacks = [BatchTiming(), SamplesPerSec(batch_size)]

    # Fit the model using data from the TF data tensors.
    vae.fit(steps_per_epoch=steps_per_epoch, epochs=epochs,
            callbacks=callbacks)

    x = Input(shape=original_img_size)
    vae_val = make_vae(ldict, x)
    vae_val.compile(optimizer=opt, loss=None)
    loss = vae_val.evaluate(x=x_test, y=None, batch_size=batch_size // ngpus)
    print('\n\nVAE VALIDATION LOSS: {}'.format(loss))

    x = Input(shape=original_img_size)
    z_mean, _ = get_encoded(ldict, x)
    encoder = Model(x, z_mean)
    # :  :type encoder: Model

    decoder_input = Input(shape=(latent_dim,))
    x_decoded_mean_squash = get_decoded(ldict, decoder_input)
    generator = Model(decoder_input, x_decoded_mean_squash)
    # :  :type generator: Model

    # display a 2D plot of the digit classes in the latent space
    x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
    plt.figure(figsize=(6, 6))
    plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)
    plt.colorbar()
    # plt.show()
    plt.savefig('vae_scatter.ps')
    plt.close()

    # display a 2D manifold of the digits
    n = 15  # figure with 15x15 digits
    digit_size = 28
    figure = np.zeros((digit_size * n, digit_size * n))
    # Linearly spaced coordinates on the unit square were transformed through
    # the inverse CDF (ppf) of the Gaussian
    # To produce values of the latent variables z, since the prior of the
    # latent space is Gaussian
    grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
    grid_y = norm.ppf(np.linspace(0.05, 0.95, n))

    for i, yi in enumerate(grid_x):
        for j, xi in enumerate(grid_y):
            z_sample = np.array([[xi, yi]])
            z_sample = np.tile(z_sample, batch_size).reshape(batch_size, 2)
            x_decoded = generator.predict(z_sample, batch_size=batch_size)
            digit = x_decoded[0].reshape(digit_size, digit_size)
            figure[i * digit_size: (i + 1) * digit_size,
                   j * digit_size: (j + 1) * digit_size] = digit

    plt.figure(figsize=(10, 10))
    plt.imshow(figure, cmap='Greys_r')
    # plt.show()
    plt.savefig('vae_digit.ps')
    plt.close()
    def __init__(self, txt_file, mode, batch_size, num_classes, shuffle=True,
                 buffer_size=1000):
        """Create a new ImageDataGenerator.

        Recieves a path string to a text file, which consists of many lines,
        where each line has first a path string to an image and seperated by
        a space an integer, referring to the class number. Using this data,
        this class will create TensrFlow datasets, that can be used to train
        e.g. a convolutional neural network.

        Args:
            txt_file: Path to the text file.
            mode: Either 'training' or 'validation'. Depending on this value,
                different parsing functions will be used.
            batch_size: Number of images per batch.
            num_classes: Number of classes in the dataset.
            shuffle: Wether or not to shuffle the data in the dataset and the
                initial file list.
            buffer_size: Number of images used as buffer for TensorFlows
                shuffling of the dataset.

        Raises:
            ValueError: If an invalid mode is passed.

        """
        self.txt_file = txt_file
        self.num_classes = num_classes

        # retrieve the data from the text file
        self._read_txt_file()

        # number of samples in the dataset
        self.data_size = len(self.labels)

        # initial shuffling of the file and label lists (together!)
        if shuffle:
            self._shuffle_lists()

        # convert lists to TF tensor
        self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
        self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)

        # create dataset
        data = Dataset.from_tensor_slices((self.img_paths, self.labels))

        # distinguish between train/infer. when calling the parsing functions
        if mode == 'training':
            data = data.map(self._parse_function_train, num_threads=8,
                      output_buffer_size=100*batch_size)

        elif mode == 'inference':
            data = data.map(self._parse_function_inference, num_threads=8,
                      output_buffer_size=100*batch_size)

        else:
            raise ValueError("Invalid mode '%s'." % (mode))

        # shuffle the first `buffer_size` elements of the dataset
        if shuffle:
            data = data.shuffle(buffer_size=buffer_size)

        # create a new dataset with batches of images
        data = data.batch(batch_size)

        self.data = data
Esempio n. 34
0
import numpy as np
from tensorflow.contrib.data import Dataset


# load your data or create your data in here
npx = np.random.uniform(-1, 1, (1000, 1))                           # x data
npy = np.power(npx, 2) + np.random.normal(0, 0.1, size=npx.shape)   # y data
npx_train, npx_test = np.split(npx, [800])                          # training and test data
npy_train, npy_test = np.split(npy, [800])

# use placeholder, later you may need different data, pass the different data into placeholder
tfx = tf.placeholder(npx_train.dtype, npx_train.shape)
tfy = tf.placeholder(npy_train.dtype, npy_train.shape)

# create dataloader
dataset = Dataset.from_tensor_slices((tfx, tfy))
dataset = dataset.shuffle(buffer_size=1000)   # choose data randomly from this buffer
dataset = dataset.batch(32)                   # batch size you will use
dataset = dataset.repeat(3)                   # repeat for 3 epochs
iterator = dataset.make_initializable_iterator()  # later we have to initialize this one

# your network
bx, by = iterator.get_next()                  # use batch to update
l1 = tf.layers.dense(bx, 10, tf.nn.relu)
out = tf.layers.dense(l1, npy.shape[1])
loss = tf.losses.mean_squared_error(by, out)
train = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

sess = tf.Session()
# need to initialize the iterator in this case
sess.run([iterator.initializer, tf.global_variables_initializer()], feed_dict={tfx: npx_train, tfy: npy_train})
Esempio n. 35
0
    return predictions


batch_size = 128
buffer_size = 10000
steps_per_epoch = int(np.ceil(60000 / float(batch_size)))  # = 469
epochs = 5
num_classes = 10

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype(np.float32) / 255
x_train = np.expand_dims(x_train, -1)
y_train = tf.one_hot(y_train, num_classes)

# Create the dataset and its associated one-shot iterator.
dataset = Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.repeat()
dataset = dataset.shuffle(buffer_size)
dataset = dataset.batch(batch_size)
iterator = dataset.make_one_shot_iterator()

# Model creation using tensors from the get_next() graph node.
inputs, targets = iterator.get_next()
model_input = layers.Input(tensor=inputs)
model_output = cnn_layers(model_input)
train_model = keras.models.Model(inputs=model_input, outputs=model_output)

train_model.compile(optimizer=keras.optimizers.RMSprop(lr=2e-3, decay=1e-5),
                    loss='categorical_crossentropy',
                    metrics=['accuracy'],
                    target_tensors=[targets])
def main(argv=None):
    '''
    '''
    main.__doc__ = __doc__
    argv = sys.argv if argv is None else sys.argv.extend(argv)
    desc = main.__doc__  # .format(os.path.basename(__file__))
    # CLI parser
    args = parser_(desc)

    nranks_per_gpu = args.nranks_per_gpu
    local_rank = hvd.local_rank()
    gpu_local_rank = local_rank // nranks_per_gpu
    print('local_rank, GPU_LOCAL_RANK: {}, {}'.format(
        local_rank, gpu_local_rank))

    # Pin GPU to be used to process local rank (one GPU per process)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    # config.gpu_options.visible_device_list = str(hvd.local_rank())
    config.gpu_options.visible_device_list = str(gpu_local_rank)
    K.set_session(tf.Session(config=config))

    # input image dimensions
    img_rows, img_cols, img_chns = 28, 28, 1
    # number of convolutional filters to use
    filters = 64
    # convolution kernel size
    num_conv = 3

    hvdsize = hvd.size()

    batch_size = 128  # 100
    if K.image_data_format() == 'channels_first':
        original_img_size = (img_chns, img_rows, img_cols)
    else:
        original_img_size = (img_rows, img_cols, img_chns)
    latent_dim = 2
    intermediate_dim = 128
    epsilon_std = 1.0
    epochs = args.epochs  # 5

    # train the VAE on MNIST digits
    (x_train, _), (x_test, y_test) = mnist.load_data()

    x_train = x_train.astype('float32') / 255.
    x_train = x_train.reshape((x_train.shape[0],) + original_img_size)
    x_test = x_test.astype('float32') / 255.
    x_test = x_test.reshape((x_test.shape[0],) + original_img_size)

    if hvd.rank() == 0:
        print('x_train.shape:', x_train.shape)

    train_samples = x_train.shape[0]
    # steps_per_epoch = train_samples // batch_size // hvdsize
    speedupopt = args.speedup
    if speedupopt == SpeedupOpts.imgspersec:
        steps_per_epoch = train_samples // batch_size
    else:
        steps_per_epoch = int(round(
            float(train_samples) / batch_size / hvdsize + 0.5))

    # Create the dataset and its associated one-shot iterator.
    buffer_size = 10000
    dataset = Dataset.from_tensor_slices(x_train)
    dataset = dataset.repeat()
    dataset = dataset.shuffle(buffer_size)
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_one_shot_iterator()
    x_train_batch = iterator.get_next()

    ldict = make_shared_layers_dict(
        img_chns, img_rows, img_cols, batch_size, filters,
        num_conv, intermediate_dim, latent_dim, epsilon_std)
    # ldict is a dictionary that holds all layers. Since these layers are
    # instantiated once, they are shared amongs vae, encoder, and generator.

    x = Input(tensor=x_train_batch)
    vae = make_vae(ldict, x)
    # :  :type vae: Model

    lr = 0.001  # * hvdsize
    opt = tf.train.RMSPropOptimizer(lr)
    # Add Horovod Distributed Optimizer.
    opt = hvd.DistributedOptimizer(opt)  # , use_locking=True)
    opt = TFOptimizer(opt)

    # opt = RMSprop(lr)
    # Add Horovod Distributed Optimizer.
    # opt = hvd_keras.DistributedOptimizer(opt)  # , use_locking=True)

    vae.compile(optimizer=opt, loss=None)
    if hvd.rank() == 0:
        vae.summary()

    callbacks = []
    if hvd.rank() == 0:
        callbacks += [BatchTiming(), SamplesPerSec(batch_size * hvdsize)]

    sess = K.get_session()
    sess.run(hvd.broadcast_global_variables(0))

    # Fit the model using data from the TF data tensors.
    vae.fit(steps_per_epoch=steps_per_epoch, epochs=epochs,
            callbacks=callbacks)

    if hvd.rank() == 0:
        x = Input(shape=original_img_size)
        vae_val = make_vae(ldict, x)
        vae_val.compile(optimizer=opt, loss=None)
        loss = vae_val.evaluate(x=x_test, y=None, batch_size=batch_size)
        print('\n\nVAE VALIDATION LOSS: {}'.format(loss))

        x = Input(shape=original_img_size)
        z_mean, _ = get_encoded(ldict, x)
        encoder = Model(x, z_mean)
        # :  :type encoder: Model

        decoder_input = Input(shape=(latent_dim,))
        x_decoded_mean_squash = get_decoded(ldict, decoder_input)
        generator = Model(decoder_input, x_decoded_mean_squash)
        # :  :type generator: Model

        # display a 2D plot of the digit classes in the latent space
        x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
        plt.figure(figsize=(6, 6))
        plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)
        plt.colorbar()
        # plt.show()
        plt.savefig('vae_scatter.ps')
        plt.close()

        # display a 2D manifold of the digits
        n = 15  # figure with 15x15 digits
        digit_size = 28
        figure = np.zeros((digit_size * n, digit_size * n))
        # Linearly spaced coordinates on the unit square were transformed
        # through the inverse CDF (ppf) of the Gaussian
        # To produce values of the latent variables z, since the prior of the
        # latent space is Gaussian
        grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
        grid_y = norm.ppf(np.linspace(0.05, 0.95, n))

        for i, yi in enumerate(grid_x):
            for j, xi in enumerate(grid_y):
                z_sample = np.array([[xi, yi]])
                z_sample = np.tile(z_sample, batch_size).reshape(batch_size, 2)
                x_decoded = generator.predict(z_sample, batch_size=batch_size)
                digit = x_decoded[0].reshape(digit_size, digit_size)
                figure[i * digit_size: (i + 1) * digit_size,
                       j * digit_size: (j + 1) * digit_size] = digit

        plt.figure(figsize=(10, 10))
        plt.imshow(figure, cmap='Greys_r')
        # plt.show()
        plt.savefig('vae_digit.ps')
        plt.close()

    K.clear_session()