def extract_mel_spectrogram(wav_path, X, y, index, curr_speaker_num): """ Extracts the mel spectrogram into the X array and saves the speaker into y. :param wav_path: the path to the wav file :param X: return Array that saves the mel spectrogram :param y: return Array that saves the speaker numbers :param index: the index in X and y this is stored in :param curr_speaker_num: the speaker number of the current speaker :return: a one (1) to increase the index """ Sxx = spectrogram_converter.mel_spectrogram(wav_path) for i in range(Sxx.shape[0]): for j in range(Sxx.shape[1]): X[index, 0, i, j] = Sxx[i, j] y[index] = curr_speaker_num return 1
def extract_mel_spectrogram(wav_path, X, y, index, speaker_uid): """ Extracts the mel spectrogram into the X array and saves the speaker into y. :param wav_path: the path to the wav file :param X: return Array that saves the mel spectrogram :param y: return Array that saves the speaker numbers :param index: the index in X and y this is stored in :param speaker_uid: the speaker number of the current speaker (integer hash of his identifier) :return: a one (1) to increase the index """ #print('processing ', wav_path) Sxx = spectrogram_converter.mel_spectrogram(wav_path) for i in range(Sxx.shape[0]): for j in range(Sxx.shape[1]): # In case the file is longer than the :max_audio_length defined in the speaker_factory.py, # we only use the spectrogram up to that cut off point at :max_audio_length if j >= X.shape[3]: continue X[index, 0, i, j] = Sxx[i, j] y[index] = speaker_uid
def save_spectrogramm_png(path): # Load the mel spectrogram spectrogram = mel_spectrogram(path) # Begin the plot figure = plot.figure(1) plot.imshow(spectrogram[:, 20:160]) # Add the color bar color_bar = plot.colorbar() n = np.linspace(0, 35, num=11) labels = [] for l in n: labels.append(str(l) + ' dB') color_bar.ax.set_yticklabels(labels) # Add x and y labels plot.xlabel('Spektra (in Zeit)') plot.ylabel('Frequenz-Datenpunkte') # Save the figure to disc figure.savefig(get_result_png('spectrogram'))