def test_spectrogram_256(): n_fft = 256 sample_rate = 250000 samples = sample_rate * 1 spectrogram = Audio.load_tone( 20, samples, 'sine', sample_rate).get_spectrogram_array(n_fft=n_fft) spectrogram = to_uint8(spectrogram) assert spectrogram.shape[0] == (n_fft / 2) + 1, 'spectrogram height' assert spectrogram.shape[1] == int( samples * 2 / n_fft) + 1, 'spectrogram width' assert np.array([ all(0 <= y <= 255 for y in x) for x in spectrogram ]).all(), f'range check {spectrogram} failed. Expected all values 0-255'
def test_mel_spectrogram_256(): n_fft = 256 hop_length = 128 n_mels = 60 sample_rate = 384000 samples = sample_rate * 1 spectrogram = Audio.load_tone(20, samples, 'sine', sample_rate).get_mel_spectrogram_array( n_fft=n_fft, n_mels=n_mels, hop_length=hop_length) spectrogram = to_uint8(spectrogram) print(spectrogram.shape) assert spectrogram.shape[0] == n_mels, 'spectrogram height' assert spectrogram.shape[1] == int( samples * 2 / n_fft) + 1, 'spectrogram width' assert np.array([ all(0 <= y <= 255 for y in x) for x in spectrogram ]).all(), f'range check {spectrogram} failed. Expected all values 0-255'
def generate_jonnor_mnist(self, preprocessed, train_folds=[1, 2, 3, 4], test_folds=[5]): x_train = [] y_train = [] s_train = [] x_test = [] y_test = [] s_test = [] for fold, filename, target, category, take, spectrogram, settings in preprocessed: d = to_uint8(spectrogram).flatten() if fold in train_folds: x_train.append(d) y_train.append(target) s_train.append(spectrogram.shape) elif fold in test_folds: x_test.append(d) y_test.append(target) s_test.append(spectrogram.shape) return x_train, y_train, s_train, x_test, y_test, s_test
def to_mnist(self, train_folds=[], test_folds=[], cache_path: str = None, n_fft: int = 1024, hop_length: int = None, flatten=True): # (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # x_train = arrays of data in [0:255] range # y_train = label (class) #Setup Caching cache_training_fold: str = "".join([str(x) for x in train_folds]) cache_testing_fold: str = "".join([str(x) for x in test_folds]) cache_file = f'mnist_{str(n_fft)}_{str(hop_length)}_{str(int(flatten))}_{cache_training_fold}_{cache_testing_fold}.pkl' cache_filename = os.path.join(cache_path, cache_file) if cache_path is not None: print(f'Caching: {cache_filename}') if os.path.exists(cache_filename): with open(cache_filename, 'rb') as f: return pickle.load(f) if hop_length is None: hop_length = int(n_fft / 4) all_records = list( zip(self.get_folds(), self.get_filenames(), self.get_targets())) x_train = [] y_train = [] s_train = [] x_test = [] y_test = [] s_test = [] for folds, x, y, s in tqdm([(train_folds, x_train, y_train, s_train), (test_folds, x_test, y_test, s_test)], desc='Fold'): for fold, filename, target in tqdm( [r for r in all_records if int(r[0]) in folds], desc='File'): audio = self.get_audio(filename) if audio is None: continue # Get a Numpy array of the spectrogram spectrogram = audio.get_spectrogram_array( n_fft=n_fft, hop_length=hop_length, mode='dB') # Normalise - Subtract mean and divide by Standard deviation spectrogram = to_uint8(spectrogram) # Flatten spectrogram if required and store if flatten: x.append(spectrogram.flatten()) else: x.append(spectrogram) y.append(target) s.append(spectrogram.shape) if cache_path is not None: with open(cache_filename, 'wb') as f: pickle.dump( (x_train, y_train, s_train, x_test, y_test, s_test), f) return x_train, y_train, s_train, x_test, y_test, s_test