def prepare(self, ds: Dataset, ds_len: int, shuffle: bool = False, augment: bool = False) -> Dataset:
     #TODO: move the general calls such as shuffle, batch, prefetch etc. to Parent class
     print(f"Preparing dataset of {ds_len} elements...")
     
     # Resize and rescale all datasets
     ds = ds.map(
         lambda x, y = None: 
             (self._resize_and_rescale(x), y) if y is not None
             else self._resize_and_rescale(x),
         num_parallel_calls=self._workers
     )
     
     # Cache all the datasets
     ds = ds.cache()
     
     if shuffle:
         ds = ds.shuffle(buffer_size=1000)#ds_len)
         
     # Batch all datasets
     ds = ds.batch(self._batch_size)    
     
     # Use data augmentation only on the training set
     if augment:
         ds = ds.map(
             lambda x, y = None:
                 (self._data_augmentation(x, training=True), y) if y is not None
                 else self._data_augmentation(x, training=True),
             num_parallel_calls=self._workers
         )
     
     # Use buffured prefetching on all datasets
     return ds.prefetch(buffer_size=self._workers)
def convert_to(tf_dataset: Dataset, directory: str, name: str):
    if not os.path.exists(directory):
        os.makedirs(directory)

    tf_dataset = tf_dataset.batch(batch_size=1)
    batches = tf_dataset.make_one_shot_iterator().get_next()

    filename = os.path.join(directory, name + ".tfrecords")
    tf.logging.info(f"Write tfrecords into {filename}")
    writer = tf.python_io.TFRecordWriter(filename)

    total_samples = 0
    with tf.Session() as sess:
        while True:
            try:
                image, label = sess.run(batches)
                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        "label": _int64_feature(label),
                        "image_raw": _bytes_feature(image.tostring()),
                    }))
                writer.write(example.SerializeToString())
                total_samples += 1
            except tf.errors.OutOfRangeError:
                tf.logging.info(
                    f"Finished conversion. Total samples: {total_samples}")
                break
Exemple #3
0
def write_to_txt(dataset: Dataset, output_dir: str, name: str):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    dataset = dataset.batch(1)
    iterator = dataset.make_one_shot_iterator()
    batch = iterator.get_next()

    filename = f"{output_dir}/mnist.{name}.txt.gz"
    tf.logging.info(f"Write txt into {filename}")
    total_samples = 0
    with gzip.open(filename, "wt") as f:
        with tf.Session() as sess:
            while True:
                try:
                    image, label = sess.run(batch)
                    label = label[0]
                    image = image[0].astype("<U16")

                    contents = str(label) + ":" + ",".join(list(image))
                    f.write(f"{contents}\n")
                    total_samples += 1
                except tf.errors.OutOfRangeError:
                    tf.logging.info(
                        f"Finished conversion. Total samples: {total_samples}")
                    break
 def evaluate(self, test_set: Dataset) -> float:
     features, labels = next(tfds.as_numpy(test_set.batch(1000)))
     ntotal: int = labels.shape[0]
     if self.memory_features is None:
         return np.sum(labels == 0) / ntotal
     prediction = np.empty(features.shape[0])
     for idx, feature in enumerate(features):
         search: Array = np.all(self.memory_features == feature, axis=1).nonzero()
         if search[0].size == 0:
             prediction[idx] = 0     # default prediction
         else:
             prediction[idx] = self.memory_labels[search[0][0]]
     return np.sum(prediction == labels) / ntotal
    def train(self,
              training_set: Dataset,
              validation_set: Dataset,
              labels: Iterable[int]) -> None:
        del validation_set
        del labels

        features, labels = next(tfds.as_numpy(training_set.batch(1000)))

        if self.memory_features is None:
            self.memory_features, self.memory_labels = features, labels
        else:
            self.memory_features = np.vstack([features, self.memory_features])
            self.memory_labels = np.hstack([labels, self.memory_labels])