Esempio n. 1
0
    def initialize_dataset(self, tf_record, batch_size, shuffle_buffer=512):
        """
        Initialize and prepare TFRecord dataset for training.
        Args:
            tf_record: TFRecord file.
            batch_size: int, training batch size
            shuffle_buffer: Buffer size for shuffling dataset.

        Returns:
            dataset.
        """
        dataset = read_tfr(tf_record, self.classes_file, get_feature_map(),
                           self.max_boxes)
        dataset = dataset.shuffle(shuffle_buffer)
        dataset = dataset.batch(batch_size)
        dataset = dataset.map(lambda x, y: (
            transform_images(x, self.input_shape[0]),
            transform_targets(y, self.anchors, self.masks, self.input_shape[0]
                              ),
        ))
        dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
        return dataset
Esempio n. 2
0
    def make_predictions(
        self,
        trained_weights,
        merge=False,
        workers=16,
        shuffle_buffer=512,
        batch_size=64,
    ):
        """
        Make predictions on both training and validation data sets
            and save results as csv in output folder.
        Args:
            trained_weights: Trained .tf weights or .weights file
                (in case self.classes = 80).
            merge: If True a single file will be saved for training
                and validation sets predictions combined.
            workers: Parallel predictions.
            shuffle_buffer: int, shuffle dataset buffer size.
            batch_size: Prediction batch size.

        Returns:
            1 combined pandas DataFrame for entire dataset predictions
                or 2 pandas DataFrame(s) for training and validation
                data sets respectively.
        """
        self.create_models(
            reverse_v4=True if trained_weights.endswith('tf') else False)
        self.load_weights(trained_weights)
        features = get_feature_map()
        train_dataset = read_tfr(
            self.train_tf_record,
            self.classes_file,
            features,
            self.max_boxes,
            get_features=True,
        )
        valid_dataset = read_tfr(
            self.valid_tf_record,
            self.classes_file,
            features,
            self.max_boxes,
            get_features=True,
        )
        train_dataset.shuffle(shuffle_buffer)
        valid_dataset.shuffle(shuffle_buffer)
        train_dataset = iter(train_dataset)
        valid_dataset = iter(valid_dataset)
        train_predictions = self.predict_dataset(train_dataset, workers,
                                                 'train', batch_size)
        valid_predictions = self.predict_dataset(valid_dataset, workers,
                                                 'valid', batch_size)
        if merge:
            predictions = pd.concat([train_predictions, valid_predictions])
            save_path = get_abs_path('output',
                                     'data',
                                     'full_dataset_predictions.csv',
                                     create_parents=True)
            predictions.to_csv(save_path, index=False)
            return predictions
        train_path = get_abs_path('output',
                                  'data',
                                  'train_dataset_predictions.csv',
                                  create_parents=True)
        valid_path = get_abs_path('output',
                                  'data',
                                  'valid_dataset_predictions.csv',
                                  create_parents=True)
        train_predictions.to_csv(train_path, index=False)
        valid_predictions.to_csv(valid_path, index=False)
        return train_predictions, valid_predictions