def show_spectogram(data): import matplotlib.pyplot as plt plt.subplot(1, 2, 1) display.specshow(data) plt.title('log-Spectrogram by Librosa') plt.show()
def plot_spec(signal, sr=16000, win_length=None, hop_length=None, return_spec=False): if win_length is None: win_length = int(sr * 0.025) if hop_length is None: hop_length = int(sr * 0.010) Sxx = librosa.core.stft( signal, win_length=win_length, hop_length=hop_length, n_fft=4096 ) spec = librosa.amplitude_to_db(np.abs(Sxx), ref=np.max) specshow( spec, sr=sr, x_axis='time', y_axis='hz', cmap='gray_r' ) plt.colorbar(format='%+2.0f dB') if return_spec: return spec
def plot_mfcc(mfccs): plt.figure(figsize=(10, 4)) display.specshow(mfccs, x_axis='time') plt.colorbar() plt.title('MFCC') plt.tight_layout() plt.show()
def plot_chroma(chromagram, name="untitled", sr=44100, hl=2048, output_dir="/Users/angel/Dropbox/Apps/Texpad/Thesis/figures", cmap='Reds', save=False): if chromagram.shape[0] == 12: plt.figure(figsize=(5.16, 2), dpi=150) plt.yticks((0.5, 2.5, 4.5, 5.5, 7.5, 9.5, 11.5), ('c', 'd', 'e', 'f', 'g', 'a', 'b')) elif chromagram.shape[0] == 36: plt.figure(figsize=(5.16, 2.5), dpi=150) plt.yticks((0.5, 3.5, 6.5, 9.5, 12.5, 15.5, 18.5, 21.5, 24.5, 27.5, 30.5, 33.5), ('c', r'c$\sharp$', 'd', r'e$\flat$', 'e', 'f', r'f$\sharp$', 'g', r'a$\flat$', 'a', r'b$\flat$', 'b')) specshow(chromagram, x_axis='time', sr=sr, hop_length=hl, cmap=cmap) plt.xlabel('time (secs.)') plt.ylabel('chroma') plt.yticks((0.5, 2.5, 4.5, 5.5, 7.5, 9.5, 11.5), ('c', 'd', 'e', 'f', 'g', 'a', 'b')) plt.tight_layout() if save: plt.savefig(os.path.join(output_dir, name + '.pdf'), format=FORMAT, dpi=PRINT_QUALITY, transparent=TRANSPARENT) plt.show()
def plot_speclike( # pylint: disable=too-many-arguments orderedlist, figsize=(20, 4), show_time=False, sr=16000, hop_sec=0.05, cmap='viridis', show=True): assert all( o.shape[0] == orderedlist[0].shape[0] for o in orderedlist), "All list items should be of the same length" x_axsis = 'time' if show_time else None hop_len = int(hop_sec * sr) plt.figure(figsize=figsize) specshow( np.vstack(reversed(orderedlist)), x_axis=x_axsis, sr=sr, hop_length=hop_len, cmap=cmap, ) plt.colorbar() if show: plt.show()
def display_melspectrogram(y, sr,title): S = audio_to_mel(y,sr) plt.figure(figsize=(10, 4)) display.specshow(librosa.power_to_db(S,ref = np.max), y_axis = 'mel', fmax = 8000, x_axis = 'time') plt.colorbar(format='%+2.0f dB') plt.title(title) plt.tight_layout()
def visualize_cqts(self, filename, start_width, perm_map, display=True, figsize=(10, 5)): logscalogram = np.load(self.root_dir + filename) second_width = int(1280 / 30) desired_width = self.num_seconds * second_width chosen_split = logscalogram[:, start_width:start_width + desired_width] height = logscalogram.shape[0] fig, ax = plt.subplots(1, 2, sharex='col', sharey='row', figsize=figsize) if display: ax[0].set_title('ORIGINAL SPECGRAM') specshow(chosen_split, ax=ax[0]) jigsaw_splits = [np.split(chosen_split, 3, axis=1)][0] final_jigsaw = np.concatenate(np.array( [jigsaw_splits[x][:, :-5] for x in perm_map]), axis=1) if display: ax[1].set_title('FULL JIGSAW SPECGRAM') jigsaw_xs = (desired_width * np.array([1, 2, 3]) / 3).astype('int') for jigsaw_x in jigsaw_xs: plt.plot([jigsaw_x, jigsaw_x], [0, height], '--', color='w', linewidth=2.0) specshow(final_jigsaw, ax=ax[1]) plt.show() return
def plot_db(path): test_wav, _ = librosa.load(path, sr=hp.rate) D = librosa.amplitude_to_db(np.abs(librosa.stft(test_wav)), ref=np.max) plt.figure(figsize=(12, 8)) display.specshow(D, x_axis='time', y_axis='log') plt.colorbar(format='%+2.0f dB') plt.savefig(path + '_fig.png')
def audio_processor(item): sig, sr = librosa.core.load(item.item_file) duration = len(sig) / sr nyquist = sr / 2 figsize = ( int(duration * INCHES_PER_SECOND), int((nyquist / 1000) * INCHES_PER_KHZ)) spec = np.abs(librosa.core.stft(sig)) scaled_spec = librosa.amplitude_to_db(spec) with plt.style.context('dark_background'): plt.figure(figsize=figsize) specshow(scaled_spec, sr=sr, cmap='gray', y_axis='linear', x_axis='time') plt.colorbar(format='%+2.0f dB', aspect=40) plt.tight_layout() tmp_io = io.BytesIO() plt.savefig(tmp_io, format='png', facecolor='black', bbox_inches='tight') im_file = InMemoryUploadedFile( tmp_io, None, 'thumbnail.png', 'image/png', tmp_io.getbuffer().nbytes, None) return im_file
def readFile(filepath): y,sr=librosa.load(filepath) D=librosa.stft(y) D_real, D_imag = np.real(D), np.imag(D) #print(D_imag) #D_energy = np.real(D) a=D_real**2+D_imag**2 #print(a) D_energy = np.sqrt(D_real**2+D_imag**2) # a=D_real**2+D_imag**2 # if a>=0: # D_energy = np.sqrt(D_real**2+D_imag**2) # else: # print(a) # D_energy=0 #result=np.log(D_energy) norm = librosa.util.normalize(D_energy) display.specshow(norm, y_axis='log', x_axis='time') #plt.imshow(result) #plt.savefig(filepath+".png") #plt.plot(result) #plt.show() result=np.pad(norm,([(0,0),(0,315-len(norm[0]))]),'constant') return result
def plot_spectrum_pred(rec_spec, true_spec, path, sr, hop_length): rec_spec = np.swapaxes(rec_spec, 0, 1) true_spec = np.swapaxes(true_spec, 0, 1) rec_spec = tf.squeeze(rec_spec) true_spec = tf.squeeze(true_spec) fig, axes = plt.subplots(ncols=2, sharex=True, sharey=True) plt.xlabel("Frequency") plt.ylabel("Time in sample") ax1, ax2 = axes ax1.set_title("reconstruction") specshow(librosa.amplitude_to_db(rec_spec, ref=np.max), ax=ax1, sr=sr, hop_length=hop_length * 0.75, y_axis='linear', x_axis='s') ax2.set_title("ground truth") specshow(librosa.amplitude_to_db(true_spec, ref=np.max), ax=ax2, sr=sr, hop_length=hop_length * 0.75, y_axis='linear', x_axis='s') plt.savefig(path) plt.close()
def plot_fourier_transformation(sfreq: int, audio: np.ndarray, spec_db: np.ndarray) -> None: # Compare the raw audio to the spectrogram of the audio time = np.arange(0, len(audio)) / sfreq fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True) axs[0].plot(time, audio) specshow(spec_db, sr=sfreq, x_axis='time', y_axis='hz', hop_length=2 ** 4) plt.show()
def createMelSpectrogram(input_path, fileName, output_path, saveOrShow=0): # load sound signal signal, sr = librosa.load(os.path.join(input_path, fileName), duration=10, sr=16000) #signal = filter_signal(signal, sr, target_audio_length) # create Mel Spectrogram S = Melspectrogram(n_dft=1024, n_hop=320, #n_hop=256, input_shape=(1, signal.shape[0]), padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2, power_melgram=2.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False)(signal.reshape(1, 1, -1)).numpy() S = S.reshape(S.shape[1], S.shape[2]) print(S.shape) if saveOrShow == 0: matplotlib.image.imsave(os.path.join(output_path, fileName.split(".")[0] + ".png"), S, cmap='inferno') else: #plt.imshow(S) #plt.show() display.specshow(S, sr=sr) plt.show()
def visualize_seam(self, trans, seam): """ Returns a visualization of a computed seam given the transition audio and seam matrix. The stft of the audio must be the same dimensions as the seam. :param trans: :param seam: :return: """ yft = lr.amplitude_to_db(np.abs(self.stft(trans)), ref=np.max) line_width = round(yft.shape[1] / 150) for ri, row in enumerate(seam): ci = next(i for i, v in enumerate(row) if v) for highlight in range(max(ci - line_width, 0), min(ci + line_width, len(row))): yft[ri][highlight] = 0 #for ri, row in enumerate(seam): # for ci, col in enumerate(row): # if seam[ri, ci]: # yft[ri, ci] = 0 plt.figure(figsize=(10, 4)) display.specshow(yft, y_axis='log', x_axis='time', hop_length=self.parameters['n_fft'] / 8) #plt.show() output = io.BytesIO() plt.savefig(output, format='svg') return output
def test(sample, fs, low=10000, high=25000, order=9): sample = band_pass(sample, fs=fs, low=low, high=high, order=order) D = librosa.amplitude_to_db(np.abs(librosa.stft(sample)), ref=np.max) dis.specshow(D, y_axis='linear', sr=fs, x_axis='s') plt.colorbar(format='%+2.0f dB') plt.title('Log-frequency power spectrogram') plt.show()
def plotSpec(self, y, sr): print('Iscrtavanje spektograma snage', end="") #Спектрограм је визуелни приказ спектра фреквенција у звук или други сигнал који варира временом или неком другом променљивом. yD = librosa.stft(y, n_fft=sr) #враћа матрицу комплексних вредности disp.specshow(librosa.amplitude_to_db(yD), y_axis='log', x_axis='time') print('Završeno.') return
def plot_mels(specs,fs=16000): specshow(specs, x_axis='time', y_axis='mel', sr=fs, fmax=fs / 2) plt.colorbar(format='%+2.0f dB') plt.title('Mel-frequency spectrogram') plt.tight_layout()
def spectrogram(stft, window_size, overlap, fs, y='linear', freq_subset: tuple = None, c_bar=None): hop_len = window_size * (1 - overlap) display.specshow(stft, y_axis=y, sr=fs, hop_length=hop_len) if c_bar is str: plt.colorbar(format="%.2f " + "{}".format(c_bar)) if freq_subset: hz_per_bin = (fs / 2) / (1 + window_size / 2) locs, labels = plt.yticks() c = hz_per_bin * math.floor(freq_subset[0] / hz_per_bin) d = hz_per_bin * math.ceil(freq_subset[1] / hz_per_bin) new_labels = [ "%.2f" % map_range(locs[i], locs[0], locs[-1], c, d) for i in range(len(locs)) ] plt.yticks(locs, new_labels) return plt.gca()
def plot_band_pass_filter(data_path, bpf_kernel_size=33): plt.rcParams["font.size"] = axis_font_size y, sr = librosa.load(data_path, sr=16000) plt.figure(figsize=(24, 6)) plt.subplot(2, 5, 1) display.waveplot(y, sr=sr, x_axis='none') plt.title("raw waveform") # raw spectrogram plt.subplot(2, 5, 6) display.specshow(librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max), sr=sr, y_axis='linear') plt.colorbar(format="%+2.0f dB") plt.title("Linear power spectrogram") for low_hz, plot_idx in zip([0, 1000, 2000, 3000, 4000, 5000, 6000, 7000], [2, 3, 4, 5, 7, 8, 9, 10]): high_hz = low_hz + 1000 y_input = torch.tensor(y).unsqueeze(0).unsqueeze(0) bpf = BandPassFilter(kernel_size=bpf_kernel_size, stride=1, padding=bpf_kernel_size // 2, low_hz=low_hz, high_hz=high_hz) y_pass = bpf.forward(y_input).squeeze().numpy() plt.subplot(2, 5, plot_idx) display.specshow(librosa.amplitude_to_db(np.abs(librosa.stft(y_pass)), ref=np.max), sr=sr) plt.colorbar(format="%+2.0f dB") plt.title("frequency band:[{}, {}]".format(low_hz, high_hz)) plt.tight_layout() plt.show()
def convert_wav_to_spectrogram_and_save_to_path( from_path=DIR_PATH_TO_OUT_WAV, to_path=DIR_PATH_TO_OUT_SPECTROGRAM): wav_files_array = os.listdir(from_path) # spectrogram_array = [] # i = 0 for wav_file_name in wav_files_array: y, sr = lsa.load(from_path + wav_file_name) mel_spectrogram = lsa.feature.melspectrogram(y=y) mel_spectrogram_to_db = lsa.power_to_db(mel_spectrogram, ref=np.max) fig = plt.figure(figsize=(SPECTROGRAM_WIDTH, SPECTROGRAM_HEIGHT), dpi=1, frameon=False) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) lsa_dly.specshow(mel_spectrogram_to_db) # for example save # names = tempfile._get_candidate_names() # name = next(names) out_spectrogram_name = wav_file_name.replace('.wav', '.png') fig.savefig(to_path + out_spectrogram_name, bbox_inches='tight', pad_inches=0) # end fig.canvas.draw() canvas = fig.canvas.tostring_rgb() np_array_with_filters = np.fromstring(canvas, # dtype='uint8').reshape((3, SPECTROGRAM_WIDTH, SPECTROGRAM_HEIGHT)) spectrogram_array.append( # np_array_with_filters) i = i + 1 plt.close(fig)
def show_sample(melsg, file_id=None, label="", offset=0, data_dir='data', load_clip=False): fig = plt.figure(figsize=(7, 5)) if file_id or label != "": fig.suptitle(' '.join([("XC%s" % file_id) if file_id else "", label])) gs = GridSpec(4, 1, fig, hspace=.1, wspace=0, top=.93) melsg_ax = fig.add_subplot(gs[0:3]) specshow(melsg.squeeze(), y_axis='mel', x_axis='s', ax=melsg_ax) plt.colorbar(melsg_ax.collections[0], ax=melsg_ax, pad=.01) #mfcc_ax = fig.add_subplot(gs[3]) #specshow(mfcc.squeeze(), ax=mfcc_ax, x_axis='s') #mfcc_ax.set_ylabel("MFCC") #mfcc_ax.set_yticks([0,5,10,15]) # TODO: Ensure 22050 is correct frame rate #mfcc_ax.set_xticklabels(["%0.1f"%(t+offset/(22050/512)) # for t in mfcc_ax.get_xticks()]) #plt.colorbar(mfcc_ax.collections[0], ax=mfcc_ax, aspect=7, pad=.01) plt.show() if file_id and load_clip: file_path = os.path.join(data_dir, 'audio', "XC%s.mp3" % file_id) print(file_path) import warnings warnings.simplefilter('ignore') data, samplerate = librosa.load(file_path) display(Audio(data, rate=samplerate))
def show_spec_info(filepath): print("Computing Spectrogram for '{}'".format(filepath)) #hq_db_spec = preprocess_input(filepath, False) #lq_db_spec = preprocess_input(filepath, False, sample_rate = 12000, n_fft = 512, n_mels = 96, hop_len = 256) hq_db_spec = time_func(preprocess_input, filepath, False) lq_db_spec = time_func(preprocess_input, filepath, False, sample_rate=12000, n_fft=512, n_mels=96, hop_len=256) print("Shapes") print(" High Quality Shape : {} [{:d} elements]".format( hq_db_spec.shape, np.prod(hq_db_spec.shape))) print(" Low Quality Shape : {} [{:d} elements]".format( lq_db_spec.shape, np.prod(hq_db_spec.shape))) fig = plt.figure(figsize=(12, 8)) fig.suptitle("Spectrogram's for '{}'".format( os.path.basename(filepath))) plt.subplot(2, 1, 1) specshow(hq_db_spec, y_axis='mel', x_axis='time') plt.colorbar(format='%+2.0f dB') plt.title("High Quality Mel Spectrogram") plt.subplot(2, 1, 2) specshow(lq_db_spec, y_axis='mel', x_axis='time') plt.colorbar(format='%+2.0f dB') plt.title("Low Quality Mel Spectrogram") plt.tight_layout() print("")
def plt_mfcc(mfcc_features): plt.figure(figsize=(10, 4)) rosaplt.specshow(mfcc_features, x_axis='time') plt.colorbar() plt.title('MFCC') plt.tight_layout() plt.show()
def power_spectrogram(y=None, sr=22050, title="", bw=False, hide_axes=False): """ Visualizable function \n Create a power spectrogram using np.aps(D) ** 2 :param y: list, input data :param sr: int, sampling rate :param title: str, title of the plot :param bw: bool, black and white coloured :param hide_axes: bool, hide title and axes :return: null """ d = librosa.stft(y) if bw: display.specshow(librosa.amplitude_to_db(np.abs(d)**2, ref=np.max), cmap='gray_r', sr=sr, y_axis='log', x_axis='time') else: display.specshow(librosa.amplitude_to_db(np.abs(d)**2, ref=np.max), y_axis='log', x_axis='time') if not hide_axes: plt.title(title) plt.colorbar(format='%+2.0f dB') plt.tight_layout()
def two_d_fft_mag(self, feature_type='chroma_cqt', display=False): """ Computes 2d - fourier transform magnitude coefficients of the input feature vector (numpy array) Usually fed by Constant-q transform or chroma feature vectors for cover detection tasks. """ if feature_type == 'audio': feature_vector = self.audio_vector elif feature_type == 'hpcp': feature_vector = self.hpcp() elif feature_type == 'chroma_cqt': feature_vector = self.chroma_cqt() elif feature_type == 'chroma_cens': feature_vector = self.chroma_cens() elif feature_type == 'crema': feature_vector = self.crema() else: raise IOError("two_d_fft_mag: Wrong parameter 'feature type'. " "Should be in one of these ['audio', 'hpcp', 'chroma_cqt', 'chroma_cens', 'crema']") # 2d fourier transform ndim_fft = np.fft.fft2(feature_vector) ndim_fft_mag = np.abs(np.fft.fftshift(ndim_fft)) if display: import matplotlib.pyplot as plt from librosa.display import specshow plt.figure(figsize=(8,6)) plt.title('2D-Fourier transform magnitude coefficients') specshow(ndim_fft_mag, cmap='jet') return ndim_fft_mag
def visualize(self, channel = 0, output = 'visualize.avi'): if self.__opened == False: raise Exception('load an audio file first!'); assert channel < self.__channels; beat_channels = self.get_tempo(just_beats = True); period = beat_channels[channel][1] - beat_channels[channel][0]; fps = 1/period; writer = None; for i in range(len(beat_channels[channel])-1): print('processing %d/%d' % (i, len(beat_channels[channel])-1)); segment = self.__data[int(beat_channels[channel][i] * self.__frame_rate):int(beat_channels[channel][i+1]*self.__frame_rate),channel:channel+1]; hop_length = int(2 ** np.floor(np.log2(segment.shape[0]))); spectrum, freqs = self.cqt(segment); # spectrum.shape = (channel number = 1, 88, hop number <= 2) CQT = amplitude_to_db(spectrum[0], ref = np.max); fig = plt.figure(figsize = (12,8)); display.specshow(CQT, x_axis = 'time', y_axis = 'cqt_hz'); plt.colorbar(format = '%+2.0f dB'); plt.title('Constant-Q power spectrogram (Hz)'); fig.canvas.draw(); image = np.fromstring(fig.canvas.tostring_rgb(), dtype = np.uint8, sep=''); image = image.reshape(fig.canvas.get_width_height()[::-1] + (3,)); #''' cv2.imshow('', image); cv2.waitKey(int(period)); #''' if writer is None: writer = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*'XVID'), fps, fig.canvas.get_width_height()[::-1]); writer.write(image); writer.release();
def test_different_gammas(filename, y_array = np.linspace(0, 1, 5)): """ Separate an music sample into percussions (drums) and harmonics for different gamma values. Plot the spectrograms for percussions and harmonics for all gamma values, as well as the signal-to-noise ratio (SNR) for all gamma values @params: filename: string - the name of the audio file y_array: 1D numpy array - containing the gamma values to test """ plt.figure(figsize=(12, 10)) i = 1 audioOG, srOG = lb.load(filename, sr=None) sum_squares_OG = np.sum(audioOG**2) sum_squares_OG_minus_H_array = np.array([]) sum_squares_OG_minus_P_array = np.array([]) for y in y_array: separate(filename, y); audioH, srH = lb.load('output/H.wav', sr=None) audioP, srP = lb.load('output/P.wav', sr=None) DH = lb.amplitude_to_db(np.abs(lb.stft(audioH)), ref=np.max) DP = lb.amplitude_to_db(np.abs(lb.stft(audioP)), ref=np.max) sum_squares_OG_minus_H = np.sum((audioOG - audioH)**2) sum_squares_OG_minus_P = np.sum((audioOG - audioP)**2) sum_squares_OG_minus_H_array = np.append(sum_squares_OG_minus_H_array, sum_squares_OG_minus_H) sum_squares_OG_minus_P_array = np.append(sum_squares_OG_minus_P_array, sum_squares_OG_minus_P) plt.subplot(5, 2, i) specshow(DH, y_axis='linear') plt.colorbar(format='%+2.0f dB') plt.title('Harmonic power spectrogram with gamma = ' + str(y)) plt.subplot(5, 2, i+1) specshow(DP, y_axis='linear') plt.colorbar(format='%+2.0f dB') plt.title('Percussive power spectrogram with gamma = ' + str(y)) i += 2 #plt.suptitle('Different gamma values, ' + filename) plt.tight_layout() plt.show() signal_to_noise_OG_minus_H = 10*np.log10(sum_squares_OG/sum_squares_OG_minus_H_array) signal_to_noise_OG_minus_P = 10*np.log10(sum_squares_OG/sum_squares_OG_minus_P_array) plt.plot(y_array, signal_to_noise_OG_minus_H, label='Harmonic') plt.plot(y_array, signal_to_noise_OG_minus_P, label='Percussive') plt.xlabel('Gamma') plt.ylabel('SNR') plt.legend() plt.title('Signal-to-noise ratio of harmonic and percussive components from file ' + filename + ' with different gammas') plt.show()
def plot_chroma_frequencies(self, outside_series=None, outside_sr=None): """ Plot audio where the entire spectrum is projected on 12 bins representing the 12 semitones of the musical octave :param outside_series: :param outside_sr: :return: """ y = self.select_series(outside_series) sr = self.select_sr(outside_sr) get_chroma_frequencies = self.get_chroma_frequencies(y, sr) hop_length = get_chroma_frequencies[0] chroma_gram = get_chroma_frequencies[1] plt.figure(figsize=(14, 5)) specshow(chroma_gram, x_axis='time', y_axis='chroma', hop_length=hop_length, cmap='coolwarm') plt.show()
def print_chromagram(chromagram): plt.figure(figsize=(20, 5)) display.specshow(chromagram, x_axis='time', y_axis='chroma', cmap='coolwarm') plt.show()
def display_feature(self): plt.figure(figsize=(6, 8)) plt.subplot(5, 1, 1) dp.specshow(self.sCentroid, sr=self.sr) plt.colorbar() plt.title('Spectral Centroid') plt.tight_layout() plt.subplot(5, 1, 2) dp.specshow(self.sContrast, sr=self.sr) plt.colorbar() plt.title('Spectral Contrast') plt.tight_layout() plt.subplot(5, 1, 3) dp.specshow(self.stft, sr=self.sr) plt.colorbar() plt.title('Spectrogram') plt.tight_layout() plt.subplot(5, 1, 4) dp.specshow(self.mel_spectrogram, sr=self.sr) plt.colorbar() plt.title('Mel_Spectrogram') plt.tight_layout() plt.subplot(5, 1, 5) dp.specshow(self.mfcc, sr=self.sr) plt.colorbar() plt.title('MFCC') plt.tight_layout() plt.show()
def forward(self, text, view=False, jupyter=True): with torch.no_grad(): phones = self.text2phone.string_to_tensor(text).squeeze( 0).long().to(torch.device(self.device)) mel = self.phone2mel( phones, speaker_embedding=self.speaker_embedding).transpose(0, 1) wave = self.mel2wav(mel.unsqueeze(0)).squeeze(0).squeeze(0) if jupyter: wave = torch.cat((wave.cpu(), torch.zeros([8000])), 0) if view: import matplotlib.pyplot as plt import librosa.display as lbd fig, ax = plt.subplots(nrows=2, ncols=1) ax[0].plot(wave.cpu().numpy()) lbd.specshow(mel.cpu().numpy(), ax=ax[1], sr=16000, cmap='GnBu', y_axis='mel', x_axis='time', hop_length=256) ax[0].set_title(self.text2phone.get_phone_string(text)) ax[0].yaxis.set_visible(False) ax[1].yaxis.set_visible(False) plt.subplots_adjust(left=0.05, bottom=0.1, right=0.95, top=.9, wspace=0.0, hspace=0.0) plt.show() return wave.numpy()
for j in range(0, acts.shape[1]): if acts[i-1][j] > filter_threshold and acts[i-1][j] > acts[i][j]: acts[i-1][j] += acts[i][j] acts[i][j] = 0 acts[acts < filter_threshold] = 0 # visualisation matters import matplotlib.pyplot as plt from librosa.display import specshow import matplotlib.gridspec as gridspec plt.close('all') plt.subplot2grid((2, 2), (0, 0), colspan=2) specshow(V, sr=sr, hop_length=hop_length, n_yticks=25, x_axis='time', y_axis='linear') plt.colorbar() plt.title('Input power spectrogram') #plt.subplot2grid((2, 2), (0,1)) #specshow(W_zero, sr=sr, hop_length=hop_length, n_yticks=25, n_xticks=25, x_axis='frames', y_axis='linear') ##plt.colorbar() #plt.xlabel('Components') #plt.title('Initialised Components') plt.subplot2grid((2, 2), (1,0)) specshow(comps, sr=sr, hop_length=hop_length, n_yticks=25, n_xticks=25, x_axis='frames', y_axis='linear') #plt.colorbar() plt.xlabel('Components') plt.title('Learned Components')
comps = model.fit_transform(V, W=H_zero, H=W_zero) acts = model.components_ #from librosa.decompose import decompose #comps, acts = decompose(V, n_components=n_components, sort=True) # visualisation matters import matplotlib.pyplot as plt from librosa.display import specshow import matplotlib.gridspec as gridspec plt.close('all') plt.subplot2grid((4, 2), (0,0), colspan=2) specshow(midi_mat, sr=sr, x_axis='time', y_axis='cqt_note') plt.title('midi visualization') plt.subplot2grid((4, 2), (1,0), colspan=2) specshow(V.transpose(), sr=sr, x_axis='time', y_axis='cqt_note') #plt.ylabel('pitches') plt.colorbar(format='%+2.0f dB') plt.title('Input Constant-Q power spectrum') plt.subplot2grid((4, 2), (2,0)) specshow(comps, y_axis='cqt_note') #plt.ylabel('pitches') plt.xlabel('Index') plt.title('Learned Components') plt.subplot2grid((4, 2), (2,1))