Example #1
0
def show_spectrogram(y, sr, n_fft, nmels, hopl, AW=False):


	
	S = lbr.feature.melspectrogram(y, sr=sr, n_fft=2048, hop_length=hopl, n_mels=nmels)
	
	log_S = lbr.logamplitude(S, ref_power=np.max)

	if AW:
			
		# get frequencies for bins	
		mel_freqs = lbr.mel_frequencies(n_mels=nmels, fmin=0, fmax=sr/2)

		itu_r_468 = itu_r_468_amplitude_weight_dB()

		# compute a_weighting coefficient for every bin
		log_aw = np.array(itu_r_468(mel_freqs))
		
		log_S = log_S + log_aw[:, np.newaxis]
	
	lbr.display.specshow(log_S, sr=sr, hop_length=64, x_axis='time', y_axis='mel')
	
	plt.title('mel power spectrogram')
	
	plt.tight_layout()
	
	plt.show()

	return log_S
Example #2
0
def F_Mel(fre_f, audio_conf):
    '''
    Input:
        fre_f       : FloatTensor log spectrum
        audio_conf  : 主要需要用到采样率
    Output:
        mel_f       : FloatTensor  换成mel频谱
    '''
    n_mels = fre_f.size(1)
    mel_bin = librosa.mel_frequencies(
        n_mels=n_mels, fmin=0,
        fmax=audio_conf["sample_rate"] / 2) * audio_conf["window_size"]
    count = 0
    fre_f = fre_f.numpy().tolist()
    mel_f = []
    for frame in fre_f:
        mel_f_frame = []
        for i in range(n_mels):
            left = int(math.floor(mel_bin[i]))
            right = left + 1
            tmp = (frame[right] - frame[left]) * (mel_bin[i] -
                                                  left) + frame[left]  #线性插值
            mel_f_frame.append(tmp)
        mel_f.append(mel_f_frame)
    return torch.FloatTensor(mel_f)
Example #3
0
def centroid(spectrum, config=dict()):
    '''
    Computes spectral centroid feature.

    Parameters
    ----------
    spectrum : np.ndarray [shape=(n_bins, n_frames)]
        Spectrum from which the feature is computed.

    config : dict
        Configuration dictionary. For full list of parameters with their description, see README file. This function use
        no parameters.

    Returns
    -------
    feature : np.ndarray [shape=(n_frames,)]
        Computed spectral centroid feature.
    '''

    freq = None

    spectrum_type = get(config, 'spectrum.type')
    if spectrum_type == 'cqt':
        freq = librosa.cqt_frequencies(get(config, 'spectrum.n_bins'),
                                       fmin=librosa.note_to_hz('C1'))
    elif spectrum_type == 'mel':
        freq = librosa.mel_frequencies(n_mels=get(config, 'spectrum.n_bins'),
                                       htk=True)

    return librosa.feature.spectral_centroid(S=spectrum, freq=freq)[0]
def apply_mask_to_audio(mask, y, sr):
    y = np.copy(y)
    if len(mask.shape) == 1:
        num_features = len(mask)
        freqs = librosa.mel_frequencies(num_features + 1,
                                        LOWER_FREQUENCY_LIMIT,
                                        UPPER_FREQUENCY_LIMIT)
        bandstop = obspy.signal.filter.bandstop
        # rng = list(range(num_features))
        # np.random.shuffle(rng)
        for i in range(num_features):
            lower_freq = freqs[i]
            upper_freq = freqs[i + 1]
            mask_value = mask[i]
            filtered = bandstop(y, lower_freq, upper_freq, sr)
            y = mask_value * y + (1 - mask_value) * filtered
    elif len(mask.shape) == 2:
        num_windows = len(mask)
        num_samples = len(y)
        step = int(np.round(num_samples / len(mask)))
        for window_id, sample_id in zip(range(num_windows),
                                        range(0, num_samples, step)):
            start, end = sample_id, sample_id + step
            y[start:end] = apply_mask_to_audio(mask[window_id], y[start:end],
                                               sr)
    return y
Example #5
0
def _linear_to_mel(num_freq, num_mel, sample_rate):
    mel_f = librosa.mel_frequencies(num_mel + 2)
    enorm = 2.0 / (mel_f[2:num_mel + 2] - mel_f[:num_mel])
    return tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins=num_mel,
        num_spectrogram_bins=num_freq,
        sample_rate=sample_rate,
        lower_edge_hertz=0.0,
        upper_edge_hertz=sample_rate / 2) * enorm
Example #6
0
def mel_weight(S, power):
    global _mel_freqs
    if _mel_freqs is None:
        _mel_freqs = librosa.mel_frequencies(S.shape[0], fmin=hparams.fmin)
    S = librosa.perceptual_weighting(np.abs(S)**power,
                                     _mel_freqs,
                                     ref=hparams.ref_level_db)
    S = _normalize(S - hparams.ref_level_db)
    return S
Example #7
0
def get_mel_index(pitch, hparams):
    """Get row closest to this pitch in a mel spectrogram"""
    frequencies = librosa.mel_frequencies(
        constants.TIMBRE_SPEC_BANDS,
        fmin=librosa.midi_to_hz(constants.MIN_TIMBRE_PITCH),
        fmax=librosa.midi_to_hz(constants.MAX_TIMBRE_PITCH),
        htk=hparams.spec_mel_htk)

    return np.abs(frequencies - librosa.midi_to_hz(pitch.numpy())).argmin()
Example #8
0
def mel(sr, n_fft, n_mels=128, fmin=0.0, fmax=None, htk=False):
    """Create a Filterbank matrix to combine FFT bins into Mel-frequency bins

    :usage:
        >>> mel_fb = librosa.filters.mel(22050, 2048)

        >>> # Or clip the maximum frequency to 8KHz
        >>> mel_fb = librosa.filters.mel(22050, 2048, fmax=8000)

    :parameters:
      - sr        : int
          sampling rate of the incoming signal
      - n_fft     : int
          number of FFT components
      - n_mels    : int
          number of Mel bands 
      - fmin      : float
          lowest frequency (in Hz) 
      - fmax      : float
          highest frequency (in Hz)
      - htk       : bool
          use HTK formula instead of Slaney

    :returns:
      - M         : np.ndarray, shape=(n_mels, 1+ n_fft/2)
          Mel transform matrix

    """

    if fmax is None:
        fmax = sr / 2.0

    # Initialize the weights
    size        = int(1 + n_fft / 2)
    n_mels      = int(n_mels)
    weights     = np.zeros( (n_mels, size) )

    # Center freqs of each FFT bin
    fftfreqs    = np.arange( size, dtype=float ) * sr / n_fft

    # 'Center freqs' of mel bands - uniformly spaced between limits
    freqs       = librosa.mel_frequencies(n_mels, fmin=fmin, fmax=fmax, htk=htk, extra=True)

    # Slaney-style mel is scaled to be approx constant energy per channel
    enorm       = 2.0 / (freqs[2:n_mels+2] - freqs[:n_mels])

    for i in xrange(n_mels):
        # lower and upper slopes for all bins
        lower   = (fftfreqs - freqs[i])     / (freqs[i+1] - freqs[i])
        upper   = (freqs[i+2] - fftfreqs)   / (freqs[i+2] - freqs[i+1])

        # .. then intersect them with each other and zero
        weights[i]   = np.maximum(0, np.minimum(lower, upper)) * enorm[i]
   
    return weights
Example #9
0
def melfilter(frames,
              sr,
              n_fft,
              n_mels=128,
              fmin=0.0,
              fmax=None,
              htk=True,
              norm=None):
    np = numpy
    if fmax is None:
        fmax = float(sr) / 2
    if norm is not None and norm != 1 and norm != np.inf:
        raise ParameterError('Unsupported norm: {}'.format(repr(norm)))

    # Initialize the weights
    n_mels = int(n_mels)
    weights = np.zeros((n_mels, int(1 + n_fft // 2)))

    # Center freqs of each FFT bin
    fftfreqs = fft_freqs(sr=sr, n_fft=n_fft)
    fftfreqs2 = fft_freqs2(sr=sr, n_fft=n_fft)
    assert fftfreqs.shape == fftfreqs2.shape
    numpy.testing.assert_almost_equal(fftfreqs, fftfreqs2)

    # 'Center freqs' of mel bands - uniformly spaced between limits
    mel_f = librosa.mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk)

    fdiff = np.diff(mel_f)
    #ramps = np.subtract.outer(mel_f, fftfreqs)

    for i in range(n_mels):
        # lower and upper slopes for all bins
        rlow = mel_f[i] - fftfreqs
        rupper = mel_f[i + 2] - fftfreqs

        lower = -rlow / fdiff[i]
        upper = rupper / fdiff[i + 1]

        # .. then intersect them with each other and zero
        w = np.maximum(0, np.minimum(lower, upper))
        if i == 4:
            print('wei', i, w[10:40])
        weights[i] = w

    refweighs = librosa.filters.mel(sr,
                                    n_fft,
                                    n_mels,
                                    fmin,
                                    fmax,
                                    htk=htk,
                                    norm=norm)
    numpy.testing.assert_allclose(weights, refweighs)

    return numpy.dot(frames, weights.T)
Example #10
0
def showspec(spec):
    r"""Display a spectrogram.

    Arguments:
        spec (2d numpy array): The spectrogram to display.
    """
    plt.figure(figsize=(16, 4))
    times = librosa.frames_to_time(np.arange(spec.shape[1]), constants.sr,
                                   constants.hl)
    freq = librosa.mel_frequencies(n_mels=constants.nb,
                                   fmin=constants.fm,
                                   htk=constants.htk)
    plt.pcolormesh(times, freq, spec)
Example #11
0
def spectrogram(y, power, pcen=False):
    global _mel_freqs
    stftS = librosa.stft(y,
                         n_fft=hparams.fft_size,
                         hop_length=hparams.hop_size)
    if hparams.use_preemphasis:
        y = preemphasis(y)
    S = librosa.stft(y, n_fft=hparams.fft_size, hop_length=hparams.hop_size)
    if _mel_freqs is None:
        _mel_freqs = librosa.mel_frequencies(S.shape[0], fmin=hparams.fmin)
    _S = librosa.perceptual_weighting(np.abs(S)**power,
                                      _mel_freqs,
                                      ref=hparams.ref_level_db)
    return _normalize(_S - hparams.ref_level_db), stftS
Example #12
0
    def __init__(self,
                 f_min,
                 f_max,
                 n_mels,
                 sigmoid_range=(3, 12),
                 pad_mels=20,
                 band_attention_mode=False):
        self.sigmoid_range = sigmoid_range
        self.f_min = f_min
        self.f_max = f_max
        self.n_mels = n_mels
        self.pad_mels = pad_mels
        self.band_attention_mode = band_attention_mode

        self.mel_freqs = np.asarray(
            librosa.mel_frequencies(n_mels, f_min, f_max))
Example #13
0
def fig4301(x, fig_w, fig_h, path_fig=None, verbose=False):

    fig = plt.figure(figsize=(fig_w, fig_h), tight_layout=True)

    ax = sns.heatmap(
        x.T[::-1],
        cbar=True,
        cbar_kws={"pad": .02},
        linewidths=0.0,
        rasterized=True,
        cmap="magma",  #"cubehelix" #"viridis"
    )
    ax.collections[0].colorbar.ax.tick_params(length=0, pad=1)
    ax.tick_params(
        left=False,
        bottom=False,
        length=1,
        pad=1,
        width=1,
    )

    xticks = ax.get_xticks()
    xticks = np.around(np.linspace(0, 500, 11), decimals=0).astype(int)
    xticklabels = np.around(np.linspace(0, 10, len(xticks)),
                            decimals=0).astype(int)
    import librosa
    yticks = np.around(np.linspace(0, 64, 5), decimals=0).astype(int)[:-1]
    yticklabels = np.around(
        librosa.mel_frequencies(64)[np.linspace(0, 63, 5).astype(int)] / 1000,
        decimals=0).astype(int)[::-1][:-1]
    ax.set(
        title=None,
        xlabel="Time (s)",
        xticks=xticks,
        xticklabels=xticklabels,
        ylabel="Frequency (kHz)",
        yticks=yticks,
        yticklabels=yticklabels,
    )
    ax.xaxis.set_tick_params(rotation='auto')
    ax.yaxis.set_tick_params(rotation='auto')
    plt.tight_layout()
    if path_fig:
        plt.savefig(path_fig)
    if verbose:
        plt.show(block=False)
    plt.close(fig)
Example #14
0
def prepare_mel_matrix(hparams, rate, return_numpy=True, GPU_backend=False):
    """ Create mel filter
    """
    # import tensorflow if needed
    if "tf" not in sys.modules:
        if not GPU_backend:
            os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
            os.environ["CUDA_VISIBLE_DEVICES"] = ""
        import tensorflow as tf

    # create a filter to convolve with the spectrogram
    mel_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins=hparams.num_mel_bins,
        num_spectrogram_bins=int(hparams.n_fft / 2) + 1,
        sample_rate=rate,
        lower_edge_hertz=hparams.mel_lower_edge_hertz,
        upper_edge_hertz=hparams.mel_upper_edge_hertz,
        dtype=tf.dtypes.float32,
        name=None,
    )

    # gets the center frequencies of mel bands
    mel_f = mel_frequencies(
        n_mels=hparams.num_mel_bins + 2,
        fmin=hparams.mel_lower_edge_hertz,
        fmax=hparams.mel_upper_edge_hertz,
    )

    # Slaney-style mel is scaled to be approx constant energy per channel (from librosa)
    enorm = tf.dtypes.cast(
        tf.expand_dims(
            tf.constant(
                2.0
                / (mel_f[2 : hparams.num_mel_bins + 2] - mel_f[: hparams.num_mel_bins])
            ),
            0,
        ),
        tf.float32,
    )

    mel_matrix = tf.multiply(mel_matrix, enorm)
    mel_matrix = tf.divide(mel_matrix, tf.reduce_sum(mel_matrix, axis=0))
    if return_numpy:
        return mel_matrix.numpy()
    else:
        return mel_matrix
def main():
    src_dir = "../datasets/room2reverb/test_A/"
    data_dir = "../datasets/room2reverb/test_B/"

    if not os.path.isdir("output/images/"):
        os.makedirs("output/images/")

    set_style()

    f = sys.argv[1]
    example = os.path.join(data_dir, "%s_img.wav" % f)
    src = os.path.join(src_dir, "%s_label.jpg" % f)
    output = "output/images/%s_spec.png" % f
    src_output = "output/images/%s_input.jpg" % f
    y, sr = soundfile.read(example)

    shutil.copy2(src, src_output)

    y /= numpy.abs(y).max()
    t = numpy.where(numpy.abs(y) > 0.00001)
    y = y[t[0][0]:t[0][-1]]

    m = librosa.feature.melspectrogram(y)
    m = numpy.log(m + 1e-8)[1:, :]

    fig = pyplot.figure(figsize=(8, 4))
    ax = fig.gca(projection="3d")
    f = librosa.mel_frequencies()[1:, None]
    t = (numpy.linspace(0, 1, m.shape[1]) * (len(y) / sr))[None, :]
    ax.plot_surface(t, f, m, cmap="coolwarm")
    ax.set_ylim(f[-1], f[0])
    ax.set_zticks([])
    ax.set_xlabel("Time (s)")
    ax.set_ylabel("Frequency (Hz)")

    bg = (0, 0, 0, 0)
    ax.w_zaxis.line.set_color(bg)
    ax.w_yaxis.line.set_color(bg)
    ax.w_zaxis.set_pane_color(bg)
    ax.w_yaxis.set_pane_color(bg)
    ax.w_xaxis.set_pane_color(bg)
    ax.grid(False)
    # pyplot.show()
    pyplot.savefig(output, bbox_inches="tight")
Example #16
0
def F_Mel(fre_f, audio_conf):
    '''
    Input:
        fre_f       : FloatTensor log spectrum
        audio_conf  : 主要需要用到采样率
    Output:
        mel_f       : FloatTensor  换成mel频谱
    '''
    n_mels = fre_f.size(1)
    mel_bin = librosa.mel_frequencies(n_mels=n_mels, fmin=0, fmax=audio_conf["sample_rate"]/2) * audio_conf["window_size"]
    count = 0
    fre_f = fre_f.numpy().tolist()
    mel_f = []
    for frame in fre_f:
        mel_f_frame = []
        for i in range(n_mels):
            left = int(math.floor(mel_bin[i]))
            right = left + 1
            tmp = (frame[right] - frame[left]) * (mel_bin[i] - left) + frame[left]      #线性插值
            mel_f_frame.append(tmp)
        mel_f.append(mel_f_frame)
    return torch.FloatTensor(mel_f)
Example #17
0
import torch
import librosa

from model.metrics import Lwlrap

mels = librosa.mel_frequencies(n_mels=256, fmin=50, fmax=15000)

s1 = torch.tensor([[0.5, 0.3, 0.9, 0.1]]), torch.tensor([[0, 0, 1, 0]])
s2 = torch.tensor([[0.5, 0.7, 0.9, 0.1]]), torch.tensor([[0, 1, 0, 0]])

lwlrap = Lwlrap(None)
lwlrap.update({'prediction': s1[0], 'target': s1[1]})
lwlrap.update({'prediction': s2[0], 'target': s2[1]})

print(lwlrap.compute())
Example #18
0
def showdata(audiobeats,
             duration=None,
             offset=None,
             beatfinder=None,
             device=None,
             showpred=True):
    r"""Displays the data pointed by an AudioBeats object. It shows the onset envelope,
    the onsets, the onsets selected as beats, the ground truth beats, and the spectrogram.

    Arguments:
        audiobeats (AudioBeats): The `AudioBeats` object to display.
        duration (float): To see a smaller portion of the data, this can be set
            to a lower value than the duration of `audiobeats`.
        offset (float): Display only the data starting from `offset` (combine)
            with `duration` to see a smaller portion of the data).
        beatfinder (BeatFinder): A model, if available.
        device (torch.device): The device of the model.
        showpred (bool): To show the predicted beats.
    """

    spec, onsets, isbeat, beats = audiobeats.get_data()
    if duration == None:
        duration = audiobeats.duration
    if offset == None:
        offset = 0

    if showpred:
        pred_beats, _ = audiobeats.predicted_beats()

    onsets_times = librosa.frames_to_time(onsets, constants.sr, constants.hl)

    onsets_selected = onsets[isbeat == 1]
    onsets_selected_times = librosa.frames_to_time(onsets_selected,
                                                   constants.sr, constants.hl)

    onset_envelope = utils.onset_strength(spec=spec)

    times = librosa.frames_to_time(np.arange(len(onset_envelope)),
                                   constants.sr, constants.hl)

    total_duration = librosa.frames_to_time(spec.shape[1], constants.sr,
                                            constants.hl)

    plt.figure(figsize=(16, 8))
    plt.subplots_adjust(hspace=0)

    plt.subplot(4, 1, 1)
    if audiobeats.beats_file:
        plt.vlines(beats, 2, 3, color='g', label='Ground truth\nbeats')
        plt.ylim(0, 3)
    else:
        plt.ylim(0, 2)
    if showpred:
        plt.vlines(pred_beats, 1, 2, color='b', label='Predicted beats')
        plt.vlines(onsets_selected_times,
                   0,
                   1,
                   color='m',
                   linestyles='-',
                   alpha=1,
                   label='Onsets selected\nas beats')
    else:
        plt.vlines(onsets_selected_times,
                   1,
                   2,
                   color='m',
                   linestyles='-',
                   alpha=1,
                   label='Onsets selected\nas beats')
        plt.ylim(1, 3)
    plt.xlim(offset, offset + duration)
    plt.xticks([], [])
    plt.yticks([], [])
    plt.legend(frameon=True, framealpha=0.75, bbox_to_anchor=(1.15, 1))

    plt.subplot(4, 1, 2)
    plt.vlines(onsets_times,
               0,
               1,
               color='k',
               linestyles='--',
               alpha=0.3,
               label='Onsets')

    if beatfinder:
        probs = audiobeats.probabilities(beatfinder, device)
        plt.vlines(onsets_times,
                   0,
                   probs[onsets],
                   color='r',
                   linewidths=7,
                   alpha=0.25,
                   label='Probability of the\nonset to be a beat')
    else:
        plt.vlines(onsets_selected_times,
                   0,
                   1,
                   color='m',
                   linestyles='--',
                   alpha=1,
                   label='Onsets selected\nas beats')
    plt.plot(times, onset_envelope, label='Onset envelope')
    plt.xlim(offset, offset + duration)
    plt.ylim(0, 1)
    plt.xticks([], [])
    plt.legend(frameon=True, framealpha=0.75, bbox_to_anchor=(1.15, 1))

    plt.subplot(2, 1, 2)
    freq = librosa.mel_frequencies(n_mels=constants.nb,
                                   fmin=constants.fm,
                                   htk=constants.htk)
    plt.pcolormesh(times, freq, spec)
    plt.xlabel('Time [seconds]')
    plt.ylabel('Frequency [Hz]')
    plt.xlim(offset, offset + duration)
Example #19
0
    def show_debug_img(self, mel, orig_mel, rows: pd.DataFrame, t_min, t_max):
        mel_frequencies = librosa.mel_frequencies(self.n_mels, self.fmin,
                                                  self.fmax)

        def find_nearest_idx(array, value):
            array = np.asarray(array)
            idx = (np.abs(array - value)).argmin()
            return idx

        def _draw_rect(mel, name, row_t_min, row_t_max, f_min, f_max):
            mel_min = find_nearest_idx(mel_frequencies, f_min)
            mel_max = find_nearest_idx(mel_frequencies, f_max)
            mel_min = self.n_mels - mel_min
            mel_max = self.n_mels - mel_max

            # x1 = row_t_min - t_min
            # x2 = min(row_t_max, t_max) - t_min
            x1 = row_t_min
            x2 = row_t_max

            cv2.rectangle(mel, (x1, mel_max), (x2, mel_min), (0, 255, 0))
            cv2.putText(mel, name, (x1, mel_max + 16 - 20),
                        cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0))

        orig_mel = orig_mel[::-1, :]
        mel_norm = np.zeros_like(orig_mel)
        cv2.normalize(orig_mel, mel_norm, 0, 255, cv2.NORM_MINMAX)

        mel_norm = cv2.applyColorMap(mel_norm.astype(np.uint8),
                                     cv2.COLORMAP_MAGMA)
        mel = cv2.applyColorMap((mel[::-1, :] * 255).astype(np.uint8),
                                cv2.COLORMAP_MAGMA)
        # mel = cv2.cvtColor(mel[::-1, :], cv2.COLOR_GRAY2BGR)

        orig_mel = cv2.cvtColor(orig_mel, cv2.COLOR_GRAY2BGR)
        # mel_norm = cv2.cvtColor(mel_norm, cv2.COLOR_GRAY2BGR)
        # mel = cv2.cvtColor(mel[::-1, :], cv2.COLOR_GRAY2BGR)

        draw = False
        first_row_id = None
        for row_id, row in rows.iterrows():  # type:pd.DataFrame
            first_row_id = first_row_id or row[SampleDataset.k_recording_id]

            row_t_min = int(row[SampleDataset.k_t_min] * self.sampling_rate /
                            self.hop_length)
            row_t_max = int(row[SampleDataset.k_t_max] * self.sampling_rate /
                            self.hop_length)
            # if row_t_min <= t_max and row_t_max >= t_min:
            if 1:
                draw = True
                name = ('tp|' if row[SampleDataset.k_is_tp] else
                        'fp|') + row[SampleDataset.k_key]
                # row_t_min, row_t_max = max(row_t_min, t_min), min(row_t_max, t_max)
                rect_info = name, row_t_min, row_t_max, row[
                    SampleDataset.k_f_min], row[SampleDataset.k_f_max]
                if first_row_id == row[SampleDataset.k_recording_id]:
                    _draw_rect(orig_mel, *rect_info)
                    _draw_rect(mel_norm, *rect_info)
                _draw_rect(mel, *rect_info)

        if not draw:
            print('Missing draw!')

        # cv2.imshow('orig_mel', orig_mel)
        cv2.imshow('mel_norm', mel_norm)
        cv2.imshow('mel', mel)
        # cv2.moveWindow('orig_mel', 0, 0)
        cv2.moveWindow('mel_norm', 0, (orig_mel.shape[0] + 32) * 0)
        cv2.moveWindow('mel', 0, (orig_mel.shape[0] + 32) * 1)
        cv2.waitKey(0)
Example #20
0
TIME = CFG.duration
SR = 48000
FMIN = 40
FMAX = SR // 2
IMAGE_WIDTH = 456
IMAGE_HEIGHT = 456
N_MELS = IMAGE_HEIGHT
HOP_SIZE = 512
WINDOW_SIZE = 512 * 6

# 各speciesのfminとmfaxを求める
species_fmin = traint.groupby("species_id")["f_min"].agg(min).reset_index()
species_fmax = traint.groupby("species_id")["f_max"].agg(max).reset_index()
species_fmin_fmax = pd.merge(species_fmin, species_fmax, on="species_id")

MEL_FREQ = librosa.mel_frequencies(fmin=FMIN, fmax=FMAX, n_mels=IMAGE_HEIGHT)


def search_bin(value):
    n = 0
    for i, v in enumerate(MEL_FREQ):
        if v < value:
            pass
        else:
            n = i - 1
            break
    return n


# mel specに変換したときの座標を求める
# https://akifukka.hatenablog.com/entry/text2speech2
Example #21
0
 def mel_frequencies(self) -> List[float]:
     # according to librosa.filters.mel code
     return librosa.mel_frequencies(self.mel_frequency_count + 2,
                                    fmax=self.sample_rate / 2)
Example #22
0
def main():

    st.title('audio visualizer')
    uploaded_file = st.sidebar.file_uploader(
        "audio file upload (only monoral audio!)")

    if uploaded_file is not None:
        wav, sr = librosa.load(uploaded_file, sr=None)
        wav_seconds = int(len(wav) / sr)

        st.write('sampling rate = ', sr, 'Hz')
        st.audio(uploaded_file)

        st.sidebar.title('sound waveform')
        tgt_ranges = st.sidebar.slider("target range(s)", 0, wav_seconds,
                                       (0, wav_seconds))
        st.sidebar.title('melspectrogram')
        hop_len = st.sidebar.slider('hop len',
                                    min_value=128,
                                    max_value=2048,
                                    step=128,
                                    value=1024)
        win_len = st.sidebar.slider('win len',
                                    min_value=512,
                                    max_value=4096,
                                    step=256,
                                    value=2048)
        n_mel = st.sidebar.slider('mel num',
                                  min_value=64,
                                  max_value=256,
                                  step=8,
                                  value=128)
        st.sidebar.title('spectrum')
        ave_win_len = st.sidebar.slider('ave win len',
                                        min_value=2,
                                        max_value=500,
                                        step=2,
                                        value=100)

        fig = go.Figure()
        x_wav = np.arange(len(wav)) / sr
        fig.add_trace(go.Scatter(y=wav[::HOP], name="wav"))
        fig.add_vrect(x0=int(tgt_ranges[0] * sr / HOP),
                      x1=int(tgt_ranges[1] * sr / HOP),
                      fillcolor="LightSalmon",
                      opacity=0.5,
                      layer="below",
                      line_width=0)
        fig.update_layout(
            title="sound waveform",
            width=GRAPH_WIDTH,
            height=GRAPH_HEIGHT,
            xaxis=dict(
                tickmode='array',
                tickvals=[1, int(len(wav[::HOP]) / 2),
                          len(wav[::HOP])],
                ticktext=[str(0),
                          str(int(wav_seconds / 2)),
                          str(wav_seconds)],
                title="time(s)"))
        st.plotly_chart(fig)

        wav_element = wav[tgt_ranges[0] * sr:tgt_ranges[1] * sr]

        # melspectrogram
        mel = calc_melspectrogram(wav_element, sr, win_len, hop_len, n_mel)
        mel_bins = librosa.mel_frequencies(n_mel, 0, int(sr / 2))

        fig = px.imshow(np.flipud(mel), aspect='auto')
        fig.update_layout(
            title="melspectrogram",
            width=GRAPH_WIDTH,
            height=GRAPH_HEIGHT,
            xaxis=dict(showticklabels=False),
            yaxis=dict(tickmode='array',
                       tickvals=[
                           1,
                           int(mel.shape[0] / 4),
                           int(mel.shape[0] / 2),
                           int(mel.shape[0] - 1)
                       ],
                       ticktext=[
                           str(int(mel_bins[int(mel.shape[0] - 1)])),
                           str(int(mel_bins[int(3 * mel.shape[0] / 4)])),
                           str(int(mel_bins[int(mel.shape[0] / 2)])),
                           str(0)
                       ],
                       title="frequency(Hz)"))
        st.write(fig)

        # spectrum
        s_power, freqs = calc_spectrum(wav_element, sr)
        fig = go.Figure()
        fig.add_trace(
            go.Scatter(x=freqs, y=move_ave(s_power, ave_win_len),
                       mode='lines'))
        fig.update_layout(title="spectrum",
                          width=GRAPH_WIDTH,
                          height=GRAPH_HEIGHT,
                          xaxis=dict(title="frequency(Hz)"),
                          yaxis=dict(title="power"))
        st.write(fig)
Example #23
0
                                  })
    return labels, Z, clstrs_full, pc_corr_clstrs




plt.style.use('mb')

plt.rcParams.update({'font.size': 15})


# In[4]:


from librosa import mel_frequencies
mel_freqs = mel_frequencies(48, fmax=8000)

def plot_Ws(Ws, corrs=None, vmax=None, vmin=None):
    if vmin is None or vmax is None:
        vmin = Ws.min()
        vmax = Ws.max()
    n_rows = np.ceil(Ws.shape[0] / 5).astype('int')
    fig, axes = plt.subplots(n_rows, 5, figsize=(20, n_rows * 3.75), constrained_layout=True)
    axes = axes.flatten()
    for n in range(Ws.shape[0]):
        mappable = axes[n].imshow(Ws[n].T, aspect='auto', origin='lower', cmap='viridis', vmin=vmin, vmax=vmax)
    plt.colorbar(mappable)
    if corrs:
        fig.suptitle('{0:.2f}'.format(corrs))
    return fig