def initialize_dataset(self, tf_record, batch_size, shuffle_buffer=512): """ Initialize and prepare TFRecord dataset for training. Args: tf_record: TFRecord file. batch_size: int, training batch size shuffle_buffer: Buffer size for shuffling dataset. Returns: dataset. """ dataset = read_tfr( tf_record, self.classes_file, get_feature_map(), self.max_boxes ) dataset = dataset.shuffle(shuffle_buffer) dataset = dataset.batch(batch_size) dataset = dataset.map( lambda x, y: ( transform_images(x, self.input_shape[0]), transform_targets( y, self.anchors, self.masks, self.input_shape[0] ), ) ) dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) return dataset
def make_predictions( self, trained_weights, merge=False, workers=16, shuffle_buffer=512, batch_size=64, ): """ Make predictions on both training and validation data sets and save results as csv in Output folder. Args: trained_weights: Trained .tf weights or .weights file(in case self.classes = 80). merge: If True a single file will be saved for training and validation sets predictions combined. workers: Parallel predictions. shuffle_buffer: int, shuffle dataset buffer size. batch_size: Prediction batch size. Returns: 1 combined pandas DataFrame for entire dataset predictions or 2 pandas DataFrame(s) for training and validation data sets respectively. """ self.create_models() self.load_weights(trained_weights) features = get_feature_map() train_dataset = read_tfr( self.train_tf_record, self.classes_file, features, self.max_boxes, get_features=True, ) valid_dataset = read_tfr( self.valid_tf_record, self.classes_file, features, self.max_boxes, get_features=True, ) train_dataset.shuffle(shuffle_buffer) valid_dataset.shuffle(shuffle_buffer) train_dataset = iter(train_dataset) valid_dataset = iter(valid_dataset) train_predictions = self.predict_dataset( train_dataset, workers, 'train', batch_size ) valid_predictions = self.predict_dataset( valid_dataset, workers, 'valid', batch_size ) if merge: predictions = pd.concat([train_predictions, valid_predictions]) save_path = os.path.join( '..', 'Output', 'Data', 'full_dataset_predictions.csv' ) predictions.to_csv(save_path, index=False) return predictions train_path = os.path.join( '..', 'Output', 'Data', 'train_dataset_predictions.csv' ) valid_path = os.path.join( '..', 'Output', 'Data', 'valid_dataset_predictions.csv' ) train_predictions.to_csv(train_path, index=False) valid_predictions.to_csv(valid_path, index=False) return train_predictions, valid_predictions