def __init__(self, config): """ Args: config: config file of the current model """ self.config = config # load data here d = DataLoader(self.config) # Get the filenames and labels self.filenames = d.get_train_dataset() # Create the Dataset using Tensorflow Data API self.dataset = tf.data.Dataset.from_tensor_slices(self.filenames) # Apply parse function to get the numpy array of the images self.dataset = self.dataset.map( map_func=self._parse_function, num_parallel_calls=self.config.data_loader.num_parallel_calls, ) # Shuffle the dataset # self.dataset = self.dataset.shuffle(self.config.buffer_size) # Repeat the dataset indefinitely self.dataset = self.dataset.repeat(self.config.data_loader.num_epochs) # Applying prefetch to increase the performance # Prefetch the next 10 batches self.dataset = self.dataset.prefetch(buffer_size=10 * config.data_loader.batch_size) # Apply batching self.dataset = self.dataset.batch(config.data_loader.batch_size)
def __init__(self, config): """ Args: config: config file of the current model """ self.config = config log_object = Logger(self.config) self.logger = log_object.get_logger(__name__) # load data here d = DataLoader(self.config) self.logger.info("Data is loading...") # Get the filenames and labels self.filenames_train = d.get_train_dataset() # assert len(self.filenames) == len(self.labels) # Create the Dataset using Tensorflow Data API self.dataset = tf.data.Dataset.from_tensor_slices(self.filenames_train) # Apply parse function to get the numpy array of the images self.dataset = self.dataset.map( map_func=self._parse_function, num_parallel_calls=self.config.data_loader.num_parallel_calls, ) # Shuffle the dataset if self.config.data_loader_validation: buffer_size = int( self.config.data_loader.buffer_size * ((100 - self.config.data_loader.validation_percent) / 100)) else: buffer_size = self.config.data_loader.buffer_size self.dataset = self.dataset.shuffle(buffer_size) # Repeat the dataset indefinitely self.dataset = self.dataset.repeat() # Apply batching self.dataset = self.dataset.batch(self.config.data_loader.batch_size) # Applying prefetch to increase the performance # Prefetch the next 10 batches self.dataset = self.dataset.prefetch( buffer_size=10 * self.config.data_loader.batch_size) self.iterator = self.dataset.make_initializable_iterator() self.image = self.iterator.get_next() # Validation Dataset if self.config.data_loader.validation: self.filenames_valid = d.get_valid_dataset() # Create the Dataset using Tensorflow Data API self.valid_dataset = tf.data.Dataset.from_tensor_slices( self.filenames_valid) # Apply parse function to get the numpy array of the images self.valid_dataset = self.valid_dataset.map( map_func=self._parse_function, num_parallel_calls=self.config.data_loader.num_parallel_calls, ) buffer_size = int( (self.config.data_loader.buffer_size * self.config.data_loader.validation_percent) / 100) self.valid_dataset = self.valid_dataset.shuffle(buffer_size) self.valid_dataset = self.valid_dataset.repeat() # Apply batching self.valid_dataset = self.valid_dataset.batch(buffer_size) self.valid_iterator = self.valid_dataset.make_initializable_iterator( ) self.valid_image = self.valid_iterator.get_next() # If the mode is anomaly create the test dataset if self.config.data_loader.mode == "anomaly": self.test_filenames, self.test_labels = d.get_test_dataset() self.test_dataset = tf.data.Dataset.from_tensor_slices( (self.test_filenames, self.test_labels)) self.test_dataset = self.test_dataset.map( map_func=self._parse_function_test, num_parallel_calls=self.config.data_loader.num_parallel_calls, ) # Shuffle the dataset # self.test_dataset = self.test_dataset.shuffle(self.config.data_loader.buffer_size) # Repeat the dataset indefinitely self.test_dataset = self.test_dataset.repeat() # Apply batching self.test_dataset = self.test_dataset.batch( self.config.data_loader.test_batch) self.test_iterator = self.test_dataset.make_initializable_iterator( ) self.test_image, self.test_label = self.test_iterator.get_next() if self.config.data_loader.mode == "visualization": self.test_filenames, self.test_labels, self.ground_truth = d.get_test_dataset_vis( ) self.test_dataset = tf.data.Dataset.from_tensor_slices( (self.test_filenames, self.test_labels, self.ground_truth)) self.test_dataset = self.test_dataset.map( map_func=self._parse_function_test_2, num_parallel_calls=self.config.data_loader.num_parallel_calls, ) # Shuffle the dataset # self.test_dataset = self.test_dataset.shuffle(self.config.data_loader.buffer_size) # Repeat the dataset indefinitely self.test_dataset = self.test_dataset.repeat() # Apply batching self.test_dataset = self.test_dataset.batch( self.config.data_loader.test_batch) self.test_iterator = self.test_dataset.make_initializable_iterator( ) self.test_image, self.test_label, self.ground_truth = self.test_iterator.get_next( ) if self.config.data_loader.mode == "visualization_big": self.test_filenames, self.test_labels, self.ground_truth = d.get_test_dataset_vis_big( ) self.test_dataset = tf.data.Dataset.from_tensor_slices( (self.test_filenames, self.test_labels, self.ground_truth)) self.test_dataset = self.test_dataset.map( map_func=self._parse_function_test_2, num_parallel_calls=self.config.data_loader.num_parallel_calls, ) # Shuffle the dataset # self.test_dataset = self.test_dataset.shuffle(self.config.data_loader.buffer_size) # Repeat the dataset indefinitely self.test_dataset = self.test_dataset.repeat() # Apply batching self.test_dataset = self.test_dataset.batch( self.config.data_loader.test_batch) self.test_iterator = self.test_dataset.make_initializable_iterator( ) self.test_image, self.test_label, self.ground_truth = self.test_iterator.get_next( )