def build_validation_spec( self, hparams: Dict[str, Any]) -> tf.estimator.EvalSpec: download_data_dir = get_download_data_dir() val_files = self._get_filenames( os.path.join(download_data_dir, "validation")) return tf.estimator.EvalSpec( self._input_fn(hparams, val_files, shuffle_and_repeat=False))
def build_train_spec(self, hparams: Dict[str, Any]) -> tf.estimator.TrainSpec: download_data_dir = get_download_data_dir() train_files = self._get_filenames( os.path.join(download_data_dir, "train")) return tf.estimator.TrainSpec( self._input_fn(hparams, train_files, shuffle_and_repeat=True))
def make_data_loaders( experiment_config: Dict[str, Any], hparams: Dict[str, Any] ) -> Tuple[DataLoader, DataLoader]: download_data_dir = get_download_data_dir() batch_size = hparams["batch_size"] return ( DataLoader(get_data(True, download_data_dir), batch_size=batch_size), DataLoader(get_data(False, download_data_dir), batch_size=batch_size), )
def make_data_loaders( experiment_config: Dict[str, Any], hparams: Dict[str, Any] ) -> Tuple[Optional[tp.DataFlow], Optional[tp.DataFlow]]: """Provides training and validation data for model training.""" download_dir = get_download_data_dir() training_dataflow = tp.BatchData( tp.dataset.Mnist("train", dir=download_dir), hparams["batch_size"]) validation_dataflow = tp.BatchData( tp.dataset.Mnist("test", dir=download_dir), hparams["batch_size"]) return training_dataflow, validation_dataflow
def make_data_loaders( experiment_config: Dict[str, Any], hparams: Dict[str, Any]) -> Tuple[DataLoader, DataLoader]: download_data_dir = get_download_data_dir() train_data = get_dataset(download_data_dir, True) validation_data = get_dataset(download_data_dir, False) batch_size = hparams["batch_size"] return ( DataLoader(train_data, batch_size=batch_size), DataLoader(validation_data, batch_size=batch_size), )
def create_cifar10_sequence( experiment_config: Dict[str, Any], hparams: Dict[str, Any] ) -> Tuple[Sequence, Sequence]: """ In this example we added some fields of note under the `data` field in the YAML experiment configuration: the `acceleration` field. Under this field, you can configure multithreading by setting `use_multiprocessing` to `False`, or set it to `True` for multiprocessing. You can also configure the number of workers (processes or threads depending on `use_multiprocessing`). Another thing of note are the data augmentation fields in hyperparameters. The fields here get passed through to Keras' `ImageDataGenerator` for real-time data augmentation. """ acceleration = experiment_config["data"].get("acceleration") width_shift_range = hparams.get("width_shift_range", 0.0) height_shift_range = hparams.get("height_shift_range", 0.0) horizontal_flip = hparams.get("horizontal_flip", False) batch_size = hparams["batch_size"] download_dir = get_download_data_dir() (train_data, train_labels), (test_data, test_labels) = get_data(download_dir) # Setup training data loader. data_augmentation = { "width_shift_range": width_shift_range, "height_shift_range": height_shift_range, "horizontal_flip": horizontal_flip, } train = augment_data(train_data, train_labels, batch_size, data_augmentation) if acceleration: workers = acceleration.get("workers", 1) use_multiprocessing = acceleration.get("use_multiprocessing", False) train = KerasDataAdapter(train, workers=workers, use_multiprocessing=use_multiprocessing) # Setup validation data loader. test = keras.data.InMemorySequence( data=preprocess_data(test_data), labels=preprocess_labels(test_labels), batch_size=batch_size, ) return train, test
def create_cifar10_tf_dataset( experiment_config: Dict[str, Any], hparams: Dict[str, Any] ) -> Tuple[tf.data.Dataset, tf.data.Dataset]: download_dir = get_download_data_dir() cifar10 = tfds.image.Cifar10(data_dir=download_dir) cifar10.download_and_prepare(download_dir=download_dir) datasets = cifar10.as_dataset() ds_train, ds_test = datasets["train"], datasets["test"] ds_train = wrap_dataset(ds_train) ds_test = wrap_dataset(ds_test) batch_size = hparams["batch_size"] ds_train = ds_train.map( lambda x: (tf.divide(x["image"], 255), tf.one_hot(x["label"], NUM_CLASSES)) ) ds_test = ds_test.map( lambda x: (tf.divide(x["image"], 255), tf.one_hot(x["label"], NUM_CLASSES)) ) return ds_train.batch(batch_size), ds_test.batch(batch_size)