Beispiel #1
0
def show_spectogram(data):
    import matplotlib.pyplot as plt

    plt.subplot(1, 2, 1)
    display.specshow(data)
    plt.title('log-Spectrogram by Librosa')
    plt.show()
def plot_spec(signal, sr=16000, win_length=None, hop_length=None, return_spec=False):
    if win_length is None:
        win_length = int(sr * 0.025)
        
    if hop_length is None:
        hop_length = int(sr * 0.010)
        
    Sxx = librosa.core.stft(
        signal,
        win_length=win_length,
        hop_length=hop_length,
        n_fft=4096
    )

    spec = librosa.amplitude_to_db(np.abs(Sxx), ref=np.max)
    specshow(
        spec,
        sr=sr,
        x_axis='time',
        y_axis='hz',
        cmap='gray_r'
    )
    plt.colorbar(format='%+2.0f dB')
    
    if return_spec:
        return spec
def plot_mfcc(mfccs):
	plt.figure(figsize=(10, 4))
	display.specshow(mfccs, x_axis='time')
	plt.colorbar()
	plt.title('MFCC')
	plt.tight_layout()
	plt.show()
Beispiel #4
0
def plot_chroma(chromagram,
                name="untitled",
                sr=44100,
                hl=2048,
                output_dir="/Users/angel/Dropbox/Apps/Texpad/Thesis/figures",
                cmap='Reds',
                save=False):

    if chromagram.shape[0] == 12:
        plt.figure(figsize=(5.16, 2), dpi=150)
        plt.yticks((0.5, 2.5, 4.5, 5.5, 7.5, 9.5, 11.5),
                   ('c', 'd', 'e', 'f', 'g', 'a', 'b'))

    elif chromagram.shape[0] == 36:
        plt.figure(figsize=(5.16, 2.5), dpi=150)
        plt.yticks((0.5, 3.5, 6.5, 9.5, 12.5, 15.5, 18.5, 21.5, 24.5, 27.5,
                    30.5, 33.5),
                   ('c', r'c$\sharp$', 'd', r'e$\flat$', 'e', 'f',
                    r'f$\sharp$', 'g', r'a$\flat$', 'a', r'b$\flat$', 'b'))

        specshow(chromagram, x_axis='time', sr=sr, hop_length=hl, cmap=cmap)
        plt.xlabel('time (secs.)')
        plt.ylabel('chroma')
        plt.yticks((0.5, 2.5, 4.5, 5.5, 7.5, 9.5, 11.5),
                   ('c', 'd', 'e', 'f', 'g', 'a', 'b'))
        plt.tight_layout()
        if save:
            plt.savefig(os.path.join(output_dir, name + '.pdf'),
                        format=FORMAT,
                        dpi=PRINT_QUALITY,
                        transparent=TRANSPARENT)
        plt.show()
Beispiel #5
0
def plot_speclike(  # pylint: disable=too-many-arguments
        orderedlist,
        figsize=(20, 4),
        show_time=False,
        sr=16000,
        hop_sec=0.05,
        cmap='viridis',
        show=True):
    assert all(
        o.shape[0] == orderedlist[0].shape[0]
        for o in orderedlist), "All list items should be of the same length"

    x_axsis = 'time' if show_time else None
    hop_len = int(hop_sec * sr)

    plt.figure(figsize=figsize)
    specshow(
        np.vstack(reversed(orderedlist)),
        x_axis=x_axsis,
        sr=sr,
        hop_length=hop_len,
        cmap=cmap,
    )
    plt.colorbar()

    if show:
        plt.show()
Beispiel #6
0
def display_melspectrogram(y, sr,title):
    S = audio_to_mel(y,sr)
    plt.figure(figsize=(10, 4))
    display.specshow(librosa.power_to_db(S,ref = np.max), y_axis = 'mel', fmax = 8000, x_axis = 'time')
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
Beispiel #7
0
    def visualize_cqts(self,
                       filename,
                       start_width,
                       perm_map,
                       display=True,
                       figsize=(10, 5)):

        logscalogram = np.load(self.root_dir + filename)
        second_width = int(1280 / 30)
        desired_width = self.num_seconds * second_width
        chosen_split = logscalogram[:, start_width:start_width + desired_width]
        height = logscalogram.shape[0]
        fig, ax = plt.subplots(1,
                               2,
                               sharex='col',
                               sharey='row',
                               figsize=figsize)
        if display:
            ax[0].set_title('ORIGINAL SPECGRAM')
            specshow(chosen_split, ax=ax[0])
        jigsaw_splits = [np.split(chosen_split, 3, axis=1)][0]
        final_jigsaw = np.concatenate(np.array(
            [jigsaw_splits[x][:, :-5] for x in perm_map]),
                                      axis=1)
        if display:
            ax[1].set_title('FULL JIGSAW SPECGRAM')
            jigsaw_xs = (desired_width * np.array([1, 2, 3]) / 3).astype('int')
            for jigsaw_x in jigsaw_xs:
                plt.plot([jigsaw_x, jigsaw_x], [0, height],
                         '--',
                         color='w',
                         linewidth=2.0)
            specshow(final_jigsaw, ax=ax[1])
            plt.show()
        return
def plot_db(path):
    test_wav, _ = librosa.load(path, sr=hp.rate)
    D = librosa.amplitude_to_db(np.abs(librosa.stft(test_wav)), ref=np.max)
    plt.figure(figsize=(12, 8))
    display.specshow(D, x_axis='time', y_axis='log')
    plt.colorbar(format='%+2.0f dB')
    plt.savefig(path + '_fig.png')
Beispiel #9
0
def audio_processor(item):
    sig, sr = librosa.core.load(item.item_file)

    duration = len(sig) / sr
    nyquist = sr / 2
    figsize = (
        int(duration * INCHES_PER_SECOND),
        int((nyquist / 1000) * INCHES_PER_KHZ))

    spec = np.abs(librosa.core.stft(sig))
    scaled_spec = librosa.amplitude_to_db(spec)

    with plt.style.context('dark_background'):
        plt.figure(figsize=figsize)
        specshow(scaled_spec, sr=sr, cmap='gray', y_axis='linear', x_axis='time')
        plt.colorbar(format='%+2.0f dB', aspect=40)
        plt.tight_layout()

        tmp_io = io.BytesIO()
        plt.savefig(tmp_io, format='png', facecolor='black', bbox_inches='tight')
        im_file = InMemoryUploadedFile(
            tmp_io, None,
            'thumbnail.png',
            'image/png',
            tmp_io.getbuffer().nbytes,
            None)
    return im_file
Beispiel #10
0
def readFile(filepath):
    y,sr=librosa.load(filepath)

    D=librosa.stft(y)

    D_real, D_imag = np.real(D), np.imag(D)
    #print(D_imag)
    #D_energy = np.real(D)
    a=D_real**2+D_imag**2
    #print(a)
    D_energy = np.sqrt(D_real**2+D_imag**2)
    # a=D_real**2+D_imag**2
    # if a>=0:
    #     D_energy = np.sqrt(D_real**2+D_imag**2)
    # else:
    #     print(a)
    #     D_energy=0
    
    #result=np.log(D_energy)
    norm = librosa.util.normalize(D_energy)
    display.specshow(norm, y_axis='log', x_axis='time')
    #plt.imshow(result)
    #plt.savefig(filepath+".png")

    #plt.plot(result)
    #plt.show()
    result=np.pad(norm,([(0,0),(0,315-len(norm[0]))]),'constant')
    return result
Beispiel #11
0
def plot_spectrum_pred(rec_spec, true_spec, path, sr, hop_length):
    rec_spec = np.swapaxes(rec_spec, 0, 1)
    true_spec = np.swapaxes(true_spec, 0, 1)
    rec_spec = tf.squeeze(rec_spec)
    true_spec = tf.squeeze(true_spec)
    fig, axes = plt.subplots(ncols=2, sharex=True, sharey=True)
    plt.xlabel("Frequency")
    plt.ylabel("Time in sample")
    ax1, ax2 = axes
    ax1.set_title("reconstruction")
    specshow(librosa.amplitude_to_db(rec_spec, ref=np.max),
             ax=ax1,
             sr=sr,
             hop_length=hop_length * 0.75,
             y_axis='linear',
             x_axis='s')
    ax2.set_title("ground truth")
    specshow(librosa.amplitude_to_db(true_spec, ref=np.max),
             ax=ax2,
             sr=sr,
             hop_length=hop_length * 0.75,
             y_axis='linear',
             x_axis='s')
    plt.savefig(path)
    plt.close()
def plot_fourier_transformation(sfreq: int, audio: np.ndarray, spec_db: np.ndarray) -> None:
    # Compare the raw audio to the spectrogram of the audio
    time = np.arange(0, len(audio)) / sfreq
    fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True)
    axs[0].plot(time, audio)
    specshow(spec_db, sr=sfreq, x_axis='time', y_axis='hz', hop_length=2 ** 4)
    plt.show()
Beispiel #13
0
def createMelSpectrogram(input_path, fileName, output_path, saveOrShow=0):
    
    # load sound signal
    signal, sr = librosa.load(os.path.join(input_path, fileName), duration=10, sr=16000)
    
    #signal = filter_signal(signal, sr, target_audio_length)
    
    # create Mel Spectrogram
    S = Melspectrogram(n_dft=1024, 
                       n_hop=320,
                       #n_hop=256,
                       input_shape=(1, signal.shape[0]),
                       padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2,
                       power_melgram=2.0, return_decibel_melgram=True,
                       trainable_fb=False, trainable_kernel=False)(signal.reshape(1, 1, -1)).numpy()
    
    S = S.reshape(S.shape[1], S.shape[2])
    
    print(S.shape)
    
    if saveOrShow == 0:   
        matplotlib.image.imsave(os.path.join(output_path, fileName.split(".")[0] + ".png"), S, cmap='inferno')
    else:
        #plt.imshow(S)
        #plt.show()
        display.specshow(S, sr=sr)
        plt.show()
Beispiel #14
0
    def visualize_seam(self, trans, seam):
        """
        Returns a visualization of a computed seam given the transition audio and seam matrix. The stft of the audio
        must be the same dimensions as the seam.
        :param trans:
        :param seam:
        :return:
        """
        yft = lr.amplitude_to_db(np.abs(self.stft(trans)), ref=np.max)
        line_width = round(yft.shape[1] / 150)

        for ri, row in enumerate(seam):
            ci = next(i for i, v in enumerate(row) if v)
            for highlight in range(max(ci - line_width, 0),
                                   min(ci + line_width, len(row))):
                yft[ri][highlight] = 0

        #for ri, row in enumerate(seam):
        #    for ci, col in enumerate(row):
        #        if seam[ri, ci]:
        #            yft[ri, ci] = 0

        plt.figure(figsize=(10, 4))
        display.specshow(yft,
                         y_axis='log',
                         x_axis='time',
                         hop_length=self.parameters['n_fft'] / 8)
        #plt.show()

        output = io.BytesIO()
        plt.savefig(output, format='svg')

        return output
Beispiel #15
0
def test(sample, fs, low=10000, high=25000, order=9):
    sample = band_pass(sample, fs=fs, low=low, high=high, order=order)
    D = librosa.amplitude_to_db(np.abs(librosa.stft(sample)), ref=np.max)
    dis.specshow(D, y_axis='linear', sr=fs, x_axis='s')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Log-frequency power spectrogram')
    plt.show()
Beispiel #16
0
 def plotSpec(self, y, sr):
     print('Iscrtavanje spektograma snage', end="")
     #Спектрограм је визуелни приказ спектра фреквенција у звук или други сигнал који варира временом или неком другом променљивом.
     yD = librosa.stft(y, n_fft=sr)  #враћа матрицу комплексних вредности
     disp.specshow(librosa.amplitude_to_db(yD), y_axis='log', x_axis='time')
     print('Završeno.')
     return
Beispiel #17
0
def plot_mels(specs,fs=16000):
    specshow(specs, x_axis='time',
                             y_axis='mel', sr=fs,
                             fmax=fs / 2)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel-frequency spectrogram')
    plt.tight_layout()
Beispiel #18
0
def spectrogram(stft,
                window_size,
                overlap,
                fs,
                y='linear',
                freq_subset: tuple = None,
                c_bar=None):

    hop_len = window_size * (1 - overlap)

    display.specshow(stft, y_axis=y, sr=fs, hop_length=hop_len)

    if c_bar is str:
        plt.colorbar(format="%.2f " + "{}".format(c_bar))

    if freq_subset:
        hz_per_bin = (fs / 2) / (1 + window_size / 2)
        locs, labels = plt.yticks()
        c = hz_per_bin * math.floor(freq_subset[0] / hz_per_bin)
        d = hz_per_bin * math.ceil(freq_subset[1] / hz_per_bin)
        new_labels = [
            "%.2f" % map_range(locs[i], locs[0], locs[-1], c, d)
            for i in range(len(locs))
        ]
        plt.yticks(locs, new_labels)

    return plt.gca()
def plot_band_pass_filter(data_path, bpf_kernel_size=33):
    plt.rcParams["font.size"] = axis_font_size
    y, sr = librosa.load(data_path, sr=16000)
    plt.figure(figsize=(24, 6))
    plt.subplot(2, 5, 1)
    display.waveplot(y, sr=sr, x_axis='none')
    plt.title("raw waveform")
    # raw spectrogram
    plt.subplot(2, 5, 6)
    display.specshow(librosa.amplitude_to_db(np.abs(librosa.stft(y)),
                                             ref=np.max),
                     sr=sr,
                     y_axis='linear')
    plt.colorbar(format="%+2.0f dB")
    plt.title("Linear power spectrogram")
    for low_hz, plot_idx in zip([0, 1000, 2000, 3000, 4000, 5000, 6000, 7000],
                                [2, 3, 4, 5, 7, 8, 9, 10]):
        high_hz = low_hz + 1000
        y_input = torch.tensor(y).unsqueeze(0).unsqueeze(0)
        bpf = BandPassFilter(kernel_size=bpf_kernel_size,
                             stride=1,
                             padding=bpf_kernel_size // 2,
                             low_hz=low_hz,
                             high_hz=high_hz)
        y_pass = bpf.forward(y_input).squeeze().numpy()
        plt.subplot(2, 5, plot_idx)

        display.specshow(librosa.amplitude_to_db(np.abs(librosa.stft(y_pass)),
                                                 ref=np.max),
                         sr=sr)
        plt.colorbar(format="%+2.0f dB")
        plt.title("frequency band:[{}, {}]".format(low_hz, high_hz))
    plt.tight_layout()
    plt.show()
def convert_wav_to_spectrogram_and_save_to_path(
        from_path=DIR_PATH_TO_OUT_WAV, to_path=DIR_PATH_TO_OUT_SPECTROGRAM):
    wav_files_array = os.listdir(from_path)
    # spectrogram_array = []
    # i = 0
    for wav_file_name in wav_files_array:
        y, sr = lsa.load(from_path + wav_file_name)
        mel_spectrogram = lsa.feature.melspectrogram(y=y)
        mel_spectrogram_to_db = lsa.power_to_db(mel_spectrogram, ref=np.max)
        fig = plt.figure(figsize=(SPECTROGRAM_WIDTH, SPECTROGRAM_HEIGHT),
                         dpi=1,
                         frameon=False)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)

        lsa_dly.specshow(mel_spectrogram_to_db)

        # for example save
        # names = tempfile._get_candidate_names()
        # name = next(names)
        out_spectrogram_name = wav_file_name.replace('.wav', '.png')
        fig.savefig(to_path + out_spectrogram_name,
                    bbox_inches='tight',
                    pad_inches=0)
        # end fig.canvas.draw() canvas = fig.canvas.tostring_rgb() np_array_with_filters = np.fromstring(canvas,
        # dtype='uint8').reshape((3, SPECTROGRAM_WIDTH, SPECTROGRAM_HEIGHT)) spectrogram_array.append(
        # np_array_with_filters) i = i + 1
        plt.close(fig)
Beispiel #21
0
def show_sample(melsg,
                file_id=None,
                label="",
                offset=0,
                data_dir='data',
                load_clip=False):
    fig = plt.figure(figsize=(7, 5))
    if file_id or label != "":
        fig.suptitle(' '.join([("XC%s" % file_id) if file_id else "", label]))
    gs = GridSpec(4, 1, fig, hspace=.1, wspace=0, top=.93)
    melsg_ax = fig.add_subplot(gs[0:3])
    specshow(melsg.squeeze(), y_axis='mel', x_axis='s', ax=melsg_ax)
    plt.colorbar(melsg_ax.collections[0], ax=melsg_ax, pad=.01)
    #mfcc_ax = fig.add_subplot(gs[3])
    #specshow(mfcc.squeeze(), ax=mfcc_ax, x_axis='s')
    #mfcc_ax.set_ylabel("MFCC")
    #mfcc_ax.set_yticks([0,5,10,15])
    # TODO: Ensure 22050 is correct frame rate
    #mfcc_ax.set_xticklabels(["%0.1f"%(t+offset/(22050/512))
    #                         for t in mfcc_ax.get_xticks()])
    #plt.colorbar(mfcc_ax.collections[0], ax=mfcc_ax, aspect=7, pad=.01)
    plt.show()
    if file_id and load_clip:
        file_path = os.path.join(data_dir, 'audio', "XC%s.mp3" % file_id)
        print(file_path)
        import warnings
        warnings.simplefilter('ignore')
        data, samplerate = librosa.load(file_path)
        display(Audio(data, rate=samplerate))
    def show_spec_info(filepath):
        print("Computing Spectrogram for '{}'".format(filepath))
        #hq_db_spec = preprocess_input(filepath, False)
        #lq_db_spec = preprocess_input(filepath, False, sample_rate = 12000, n_fft = 512, n_mels = 96, hop_len = 256)

        hq_db_spec = time_func(preprocess_input, filepath, False)
        lq_db_spec = time_func(preprocess_input,
                               filepath,
                               False,
                               sample_rate=12000,
                               n_fft=512,
                               n_mels=96,
                               hop_len=256)

        print("Shapes")
        print("  High Quality Shape : {}  [{:d} elements]".format(
            hq_db_spec.shape, np.prod(hq_db_spec.shape)))
        print("  Low Quality Shape  : {}  [{:d} elements]".format(
            lq_db_spec.shape, np.prod(hq_db_spec.shape)))

        fig = plt.figure(figsize=(12, 8))
        fig.suptitle("Spectrogram's for '{}'".format(
            os.path.basename(filepath)))
        plt.subplot(2, 1, 1)
        specshow(hq_db_spec, y_axis='mel', x_axis='time')
        plt.colorbar(format='%+2.0f dB')
        plt.title("High Quality Mel Spectrogram")

        plt.subplot(2, 1, 2)
        specshow(lq_db_spec, y_axis='mel', x_axis='time')
        plt.colorbar(format='%+2.0f dB')
        plt.title("Low Quality Mel Spectrogram")

        plt.tight_layout()
        print("")
Beispiel #23
0
def plt_mfcc(mfcc_features):
    plt.figure(figsize=(10, 4))
    rosaplt.specshow(mfcc_features, x_axis='time')
    plt.colorbar()
    plt.title('MFCC')
    plt.tight_layout()
    plt.show()
Beispiel #24
0
def power_spectrogram(y=None, sr=22050, title="", bw=False, hide_axes=False):
    """
    Visualizable function \n
    Create a power spectrogram using np.aps(D) ** 2

        :param y: list, input data
        :param sr: int, sampling rate
        :param title: str, title of the plot
        :param bw: bool, black and white coloured
        :param hide_axes: bool, hide title and axes
        :return: null
    """
    d = librosa.stft(y)

    if bw:
        display.specshow(librosa.amplitude_to_db(np.abs(d)**2, ref=np.max),
                         cmap='gray_r',
                         sr=sr,
                         y_axis='log',
                         x_axis='time')
    else:
        display.specshow(librosa.amplitude_to_db(np.abs(d)**2, ref=np.max),
                         y_axis='log',
                         x_axis='time')

    if not hide_axes:
        plt.title(title)
        plt.colorbar(format='%+2.0f dB')
        plt.tight_layout()
Beispiel #25
0
    def two_d_fft_mag(self, feature_type='chroma_cqt', display=False):
        """
        Computes 2d - fourier transform magnitude coefficients of the input feature vector (numpy array)
        Usually fed by Constant-q transform or chroma feature vectors for cover detection tasks.
        """
        if feature_type == 'audio':
            feature_vector = self.audio_vector
        elif feature_type == 'hpcp':
            feature_vector = self.hpcp()
        elif feature_type == 'chroma_cqt':
            feature_vector = self.chroma_cqt()
        elif feature_type == 'chroma_cens':
            feature_vector = self.chroma_cens()
        elif feature_type == 'crema':
            feature_vector = self.crema()
        else:
            raise IOError("two_d_fft_mag: Wrong parameter 'feature type'. "
                          "Should be in one of these ['audio', 'hpcp', 'chroma_cqt', 'chroma_cens', 'crema']")

        # 2d fourier transform
        ndim_fft = np.fft.fft2(feature_vector)
        ndim_fft_mag = np.abs(np.fft.fftshift(ndim_fft))

        if display:
            import matplotlib.pyplot as plt
            from librosa.display import specshow
            plt.figure(figsize=(8,6))
            plt.title('2D-Fourier transform magnitude coefficients')
            specshow(ndim_fft_mag, cmap='jet')

        return ndim_fft_mag
Beispiel #26
0
 def visualize(self, channel = 0, output = 'visualize.avi'):
   if self.__opened == False:
     raise Exception('load an audio file first!');
   assert channel < self.__channels;
   beat_channels = self.get_tempo(just_beats = True);
   period = beat_channels[channel][1] - beat_channels[channel][0];
   fps = 1/period;
   writer = None;
   for i in range(len(beat_channels[channel])-1):
     print('processing %d/%d' % (i, len(beat_channels[channel])-1));
     segment = self.__data[int(beat_channels[channel][i] * self.__frame_rate):int(beat_channels[channel][i+1]*self.__frame_rate),channel:channel+1];
     hop_length = int(2 ** np.floor(np.log2(segment.shape[0])));
     spectrum, freqs = self.cqt(segment); # spectrum.shape = (channel number = 1, 88, hop number <= 2)
     CQT = amplitude_to_db(spectrum[0], ref = np.max);
     fig = plt.figure(figsize = (12,8));
     display.specshow(CQT, x_axis = 'time', y_axis = 'cqt_hz');
     plt.colorbar(format = '%+2.0f dB');
     plt.title('Constant-Q power spectrogram (Hz)');
     fig.canvas.draw();
     image = np.fromstring(fig.canvas.tostring_rgb(), dtype = np.uint8, sep='');
     image = image.reshape(fig.canvas.get_width_height()[::-1] + (3,));
     #'''
     cv2.imshow('', image);
     cv2.waitKey(int(period));
     #'''
     if writer is None:
       writer = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*'XVID'), fps, fig.canvas.get_width_height()[::-1]);
     writer.write(image);
   writer.release();
def test_different_gammas(filename, y_array = np.linspace(0, 1, 5)):
	"""
	Separate an music sample into percussions (drums) and harmonics for 
	different gamma values.
	Plot the spectrograms for percussions and harmonics for all gamma values,
	as well as the signal-to-noise ratio (SNR) for all gamma values

	@params: filename: string - the name of the audio file
			 y_array: 1D numpy array - containing the gamma values to test
	"""
	plt.figure(figsize=(12, 10))

	i = 1

	audioOG, srOG = lb.load(filename, sr=None)
	sum_squares_OG = np.sum(audioOG**2)
	sum_squares_OG_minus_H_array = np.array([])
	sum_squares_OG_minus_P_array = np.array([])

	for y in y_array:
		separate(filename, y);
		audioH, srH = lb.load('output/H.wav', sr=None)
		audioP, srP = lb.load('output/P.wav', sr=None)

		DH = lb.amplitude_to_db(np.abs(lb.stft(audioH)), ref=np.max)
		DP = lb.amplitude_to_db(np.abs(lb.stft(audioP)), ref=np.max)

		sum_squares_OG_minus_H = np.sum((audioOG - audioH)**2)
		sum_squares_OG_minus_P = np.sum((audioOG - audioP)**2)
		sum_squares_OG_minus_H_array = np.append(sum_squares_OG_minus_H_array, 
												 sum_squares_OG_minus_H)
		sum_squares_OG_minus_P_array = np.append(sum_squares_OG_minus_P_array, 
												 sum_squares_OG_minus_P)

		plt.subplot(5, 2, i)
		specshow(DH, y_axis='linear')
		plt.colorbar(format='%+2.0f dB')
		plt.title('Harmonic power spectrogram with gamma = ' + str(y))

		plt.subplot(5, 2, i+1)
		specshow(DP, y_axis='linear')
		plt.colorbar(format='%+2.0f dB')
		plt.title('Percussive power spectrogram with gamma = ' + str(y))

		i += 2

	#plt.suptitle('Different gamma values, ' + filename)
	plt.tight_layout()
	plt.show()

	signal_to_noise_OG_minus_H = 10*np.log10(sum_squares_OG/sum_squares_OG_minus_H_array)
	signal_to_noise_OG_minus_P = 10*np.log10(sum_squares_OG/sum_squares_OG_minus_P_array)
	plt.plot(y_array, signal_to_noise_OG_minus_H, label='Harmonic')
	plt.plot(y_array, signal_to_noise_OG_minus_P, label='Percussive')
	plt.xlabel('Gamma')
	plt.ylabel('SNR')
	plt.legend() 
	plt.title('Signal-to-noise ratio of harmonic and percussive components from file ' 
			+ filename + ' with different gammas')
	plt.show()
Beispiel #28
0
    def plot_chroma_frequencies(self, outside_series=None, outside_sr=None):
        """
        Plot audio where the entire spectrum is projected on 12 bins representing the 12 semitones of the musical octave

        :param outside_series:
        :param outside_sr:
        :return:
        """

        y = self.select_series(outside_series)
        sr = self.select_sr(outside_sr)

        get_chroma_frequencies = self.get_chroma_frequencies(y, sr)

        hop_length = get_chroma_frequencies[0]

        chroma_gram = get_chroma_frequencies[1]

        plt.figure(figsize=(14, 5))
        specshow(chroma_gram,
                 x_axis='time',
                 y_axis='chroma',
                 hop_length=hop_length,
                 cmap='coolwarm')
        plt.show()
Beispiel #29
0
def print_chromagram(chromagram):
    plt.figure(figsize=(20, 5))
    display.specshow(chromagram,
                     x_axis='time',
                     y_axis='chroma',
                     cmap='coolwarm')
    plt.show()
    def display_feature(self):
        plt.figure(figsize=(6, 8))
        plt.subplot(5, 1, 1)
        dp.specshow(self.sCentroid, sr=self.sr)
        plt.colorbar()
        plt.title('Spectral Centroid')
        plt.tight_layout()

        plt.subplot(5, 1, 2)
        dp.specshow(self.sContrast, sr=self.sr)
        plt.colorbar()
        plt.title('Spectral Contrast')
        plt.tight_layout()

        plt.subplot(5, 1, 3)
        dp.specshow(self.stft, sr=self.sr)
        plt.colorbar()
        plt.title('Spectrogram')
        plt.tight_layout()

        plt.subplot(5, 1, 4)
        dp.specshow(self.mel_spectrogram, sr=self.sr)
        plt.colorbar()
        plt.title('Mel_Spectrogram')
        plt.tight_layout()

        plt.subplot(5, 1, 5)
        dp.specshow(self.mfcc, sr=self.sr)
        plt.colorbar()
        plt.title('MFCC')
        plt.tight_layout()

        plt.show()
    def forward(self, text, view=False, jupyter=True):
        with torch.no_grad():
            phones = self.text2phone.string_to_tensor(text).squeeze(
                0).long().to(torch.device(self.device))
            mel = self.phone2mel(
                phones,
                speaker_embedding=self.speaker_embedding).transpose(0, 1)
            wave = self.mel2wav(mel.unsqueeze(0)).squeeze(0).squeeze(0)
            if jupyter:
                wave = torch.cat((wave.cpu(), torch.zeros([8000])), 0)
        if view:
            import matplotlib.pyplot as plt
            import librosa.display as lbd
            fig, ax = plt.subplots(nrows=2, ncols=1)
            ax[0].plot(wave.cpu().numpy())
            lbd.specshow(mel.cpu().numpy(),
                         ax=ax[1],
                         sr=16000,
                         cmap='GnBu',
                         y_axis='mel',
                         x_axis='time',
                         hop_length=256)
            ax[0].set_title(self.text2phone.get_phone_string(text))
            ax[0].yaxis.set_visible(False)
            ax[1].yaxis.set_visible(False)
            plt.subplots_adjust(left=0.05,
                                bottom=0.1,
                                right=0.95,
                                top=.9,
                                wspace=0.0,
                                hspace=0.0)
            plt.show()

        return wave.numpy()
        for j in range(0, acts.shape[1]):
            if acts[i-1][j] > filter_threshold and acts[i-1][j] > acts[i][j]:
                acts[i-1][j] += acts[i][j]
                acts[i][j] = 0

    acts[acts < filter_threshold] = 0

# visualisation matters
import matplotlib.pyplot as plt
from librosa.display import specshow
import matplotlib.gridspec as gridspec

plt.close('all')

plt.subplot2grid((2, 2), (0, 0), colspan=2)
specshow(V, sr=sr, hop_length=hop_length, n_yticks=25, x_axis='time', y_axis='linear')
plt.colorbar()
plt.title('Input power spectrogram')

#plt.subplot2grid((2, 2), (0,1))
#specshow(W_zero, sr=sr, hop_length=hop_length, n_yticks=25, n_xticks=25, x_axis='frames', y_axis='linear')
##plt.colorbar()
#plt.xlabel('Components')
#plt.title('Initialised Components')

plt.subplot2grid((2, 2), (1,0))
specshow(comps, sr=sr, hop_length=hop_length, n_yticks=25, n_xticks=25, x_axis='frames', y_axis='linear')
#plt.colorbar()
plt.xlabel('Components')
plt.title('Learned Components')
comps = model.fit_transform(V, W=H_zero, H=W_zero)
acts = model.components_

#from librosa.decompose import decompose

#comps, acts = decompose(V, n_components=n_components, sort=True)

# visualisation matters
import matplotlib.pyplot as plt
from librosa.display import specshow
import matplotlib.gridspec as gridspec

plt.close('all')

plt.subplot2grid((4, 2), (0,0), colspan=2)
specshow(midi_mat, sr=sr, x_axis='time', y_axis='cqt_note')
plt.title('midi visualization')

plt.subplot2grid((4, 2), (1,0), colspan=2)
specshow(V.transpose(), sr=sr, x_axis='time', y_axis='cqt_note')
#plt.ylabel('pitches')
plt.colorbar(format='%+2.0f dB')
plt.title('Input Constant-Q power spectrum')

plt.subplot2grid((4, 2), (2,0))
specshow(comps, y_axis='cqt_note')
#plt.ylabel('pitches')
plt.xlabel('Index')
plt.title('Learned Components')

plt.subplot2grid((4, 2), (2,1))