def __init__(self, config):
        """
        Args:
            config: config file of the current model
        """

        self.config = config
        # load data here
        d = DataLoader(self.config)
        # Get the filenames and labels
        self.filenames = d.get_train_dataset()
        # Create the Dataset using Tensorflow Data API
        self.dataset = tf.data.Dataset.from_tensor_slices(self.filenames)
        # Apply parse function to get the numpy array of the images
        self.dataset = self.dataset.map(
            map_func=self._parse_function,
            num_parallel_calls=self.config.data_loader.num_parallel_calls,
        )
        # Shuffle the dataset
        # self.dataset = self.dataset.shuffle(self.config.buffer_size)
        # Repeat the dataset indefinitely
        self.dataset = self.dataset.repeat(self.config.data_loader.num_epochs)
        # Applying prefetch to increase the performance
        # Prefetch the next 10 batches
        self.dataset = self.dataset.prefetch(buffer_size=10 *
                                             config.data_loader.batch_size)
        # Apply batching
        self.dataset = self.dataset.batch(config.data_loader.batch_size)
Exemple #2
0
    def __init__(self, config):
        """
        Args:
            config: config file of the current model
        """

        self.config = config
        log_object = Logger(self.config)
        self.logger = log_object.get_logger(__name__)
        # load data here
        d = DataLoader(self.config)
        self.logger.info("Data is loading...")
        # Get the filenames and labels
        self.filenames_train = d.get_train_dataset()
        # assert len(self.filenames) == len(self.labels)
        # Create the Dataset using Tensorflow Data API
        self.dataset = tf.data.Dataset.from_tensor_slices(self.filenames_train)
        # Apply parse function to get the numpy array of the images
        self.dataset = self.dataset.map(
            map_func=self._parse_function,
            num_parallel_calls=self.config.data_loader.num_parallel_calls,
        )
        # Shuffle the dataset
        if self.config.data_loader_validation:
            buffer_size = int(
                self.config.data_loader.buffer_size *
                ((100 - self.config.data_loader.validation_percent) / 100))
        else:
            buffer_size = self.config.data_loader.buffer_size
        self.dataset = self.dataset.shuffle(buffer_size)
        # Repeat the dataset indefinitely
        self.dataset = self.dataset.repeat()
        # Apply batching
        self.dataset = self.dataset.batch(self.config.data_loader.batch_size)
        # Applying prefetch to increase the performance
        # Prefetch the next 10 batches
        self.dataset = self.dataset.prefetch(
            buffer_size=10 * self.config.data_loader.batch_size)
        self.iterator = self.dataset.make_initializable_iterator()
        self.image = self.iterator.get_next()

        # Validation Dataset
        if self.config.data_loader.validation:
            self.filenames_valid = d.get_valid_dataset()
            # Create the Dataset using Tensorflow Data API
            self.valid_dataset = tf.data.Dataset.from_tensor_slices(
                self.filenames_valid)
            # Apply parse function to get the numpy array of the images
            self.valid_dataset = self.valid_dataset.map(
                map_func=self._parse_function,
                num_parallel_calls=self.config.data_loader.num_parallel_calls,
            )
            buffer_size = int(
                (self.config.data_loader.buffer_size *
                 self.config.data_loader.validation_percent) / 100)
            self.valid_dataset = self.valid_dataset.shuffle(buffer_size)
            self.valid_dataset = self.valid_dataset.repeat()
            # Apply batching
            self.valid_dataset = self.valid_dataset.batch(buffer_size)
            self.valid_iterator = self.valid_dataset.make_initializable_iterator(
            )
            self.valid_image = self.valid_iterator.get_next()

        # If the mode is anomaly create the test dataset
        if self.config.data_loader.mode == "anomaly":
            self.test_filenames, self.test_labels = d.get_test_dataset()
            self.test_dataset = tf.data.Dataset.from_tensor_slices(
                (self.test_filenames, self.test_labels))
            self.test_dataset = self.test_dataset.map(
                map_func=self._parse_function_test,
                num_parallel_calls=self.config.data_loader.num_parallel_calls,
            )
            # Shuffle the dataset
            # self.test_dataset = self.test_dataset.shuffle(self.config.data_loader.buffer_size)
            # Repeat the dataset indefinitely
            self.test_dataset = self.test_dataset.repeat()
            # Apply batching
            self.test_dataset = self.test_dataset.batch(
                self.config.data_loader.test_batch)
            self.test_iterator = self.test_dataset.make_initializable_iterator(
            )
            self.test_image, self.test_label = self.test_iterator.get_next()
        if self.config.data_loader.mode == "visualization":
            self.test_filenames, self.test_labels, self.ground_truth = d.get_test_dataset_vis(
            )
            self.test_dataset = tf.data.Dataset.from_tensor_slices(
                (self.test_filenames, self.test_labels, self.ground_truth))
            self.test_dataset = self.test_dataset.map(
                map_func=self._parse_function_test_2,
                num_parallel_calls=self.config.data_loader.num_parallel_calls,
            )
            # Shuffle the dataset
            # self.test_dataset = self.test_dataset.shuffle(self.config.data_loader.buffer_size)
            # Repeat the dataset indefinitely
            self.test_dataset = self.test_dataset.repeat()
            # Apply batching
            self.test_dataset = self.test_dataset.batch(
                self.config.data_loader.test_batch)
            self.test_iterator = self.test_dataset.make_initializable_iterator(
            )
            self.test_image, self.test_label, self.ground_truth = self.test_iterator.get_next(
            )
        if self.config.data_loader.mode == "visualization_big":
            self.test_filenames, self.test_labels, self.ground_truth = d.get_test_dataset_vis_big(
            )
            self.test_dataset = tf.data.Dataset.from_tensor_slices(
                (self.test_filenames, self.test_labels, self.ground_truth))
            self.test_dataset = self.test_dataset.map(
                map_func=self._parse_function_test_2,
                num_parallel_calls=self.config.data_loader.num_parallel_calls,
            )
            # Shuffle the dataset
            # self.test_dataset = self.test_dataset.shuffle(self.config.data_loader.buffer_size)
            # Repeat the dataset indefinitely
            self.test_dataset = self.test_dataset.repeat()
            # Apply batching
            self.test_dataset = self.test_dataset.batch(
                self.config.data_loader.test_batch)
            self.test_iterator = self.test_dataset.make_initializable_iterator(
            )
            self.test_image, self.test_label, self.ground_truth = self.test_iterator.get_next(
            )