コード例 #1
0
    def compute_librosa_features(self, audio_data, feat_name):
        """
        Compute feature using librosa methods

        :param audio_data: signal
        :param feat_name: feature to compute
        :return: np array
        """

        # if rmse_feat.shape == (1, 427):
        #     rmse_feat = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1)

        if feat_name == 'zero_crossing_rate':
            return zero_crossing_rate(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'rmse':
            return rmse(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'mfcc':
            return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)
        elif feat_name == 'spectral_centroid':
            return spectral_centroid(y=audio_data,
                                     sr=self.RATE,
                                     hop_length=self.FRAME)
        elif feat_name == 'spectral_rolloff':
            return spectral_rolloff(y=audio_data,
                                    sr=self.RATE,
                                    hop_length=self.FRAME,
                                    roll_percent=0.90)
        elif feat_name == 'spectral_bandwidth':
            return spectral_bandwidth(y=audio_data,
                                      sr=self.RATE,
                                      hop_length=self.FRAME)
コード例 #2
0
def findTimbral(wave):  # 19 dimensions
    timbral_feature = {}

    centroid = feature.spectral_centroid(wave)
    timbral_feature['mu_centroid'] = np.mean(centroid)
    timbral_feature['var_centroid'] = np.var(centroid, ddof=1)

    rolloff = feature.spectral_rolloff(wave)
    timbral_feature['mu_rolloff'] = np.mean(rolloff)
    timbral_feature['var_rolloff'] = np.var(rolloff, ddof=1)

    flux = onset_strength(wave, lag=1)  # spectral flux
    timbral_feature['mu_flux'] = np.mean(flux)
    timbral_feature['var_flux'] = np.var(flux, ddof=1)

    zero_crossing = feature.zero_crossing_rate(wave)
    timbral_feature['mu_zcr'] = np.mean(zero_crossing)
    timbral_feature['var_zcr'] = np.var(zero_crossing)

    five_mfcc = feature.mfcc(wave, n_mfcc=10)  # n_mfcc = 10 dim
    i = 1
    for coef in five_mfcc:
        timbral_feature['mu_mfcc' + str(i)] = np.mean(coef)
        timbral_feature['var_mfcc' + str(i)] = np.var(coef, ddof=1)
        i = i + 1

    percent = feature_low_energy(wave)  # 1 dim
    timbral_feature['low_energy'] = percent

    return timbral_feature
コード例 #3
0
    def compute_librosa_features(self, audio_data, feat_name):
        """
        Compute feature using librosa methods

        :param audio_data: signal
        :param feat_name: feature to compute
        :return: np array
        """
        # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror
        # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME)

        logging.info('=> Computing {}'.format(feat_name))

        if feat_name == 'zero_crossing_rate':
            return zero_crossing_rate(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'rmse':
            return rms(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'mfcc':
            return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)
        elif feat_name == 'spectral_centroid':
            return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
        elif feat_name == 'spectral_rolloff':
            return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90)
        elif feat_name == 'spectral_bandwidth':
            return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
コード例 #4
0
def compute_spec_centroid(data):
    """

    :param data:
    :return:
    """
    cents = feature.spectral_centroid(data)[0]
    return cents
コード例 #5
0
def get_files_centroid(tracks):
    output_tracks = {}
    for track in tracks:
        y, sr = librosa.load(track)
        centroid = spectral_centroid(y, sr)
        nth_track, track_name = extract_track_name(track)
        output_tracks[nth_track] = centroid
    return output_tracks
コード例 #6
0
    def test_spectral_centroid(self):
        correct = rosaft.spectral_centroid(y=self.sig,
                                           sr=self.fs,
                                           S=None,
                                           n_fft=nfft,
                                           hop_length=stepsize)
        actual = spectral_centroid(self.args)

        self.assertTrue(np.abs(correct - actual).max() < tol)
コード例 #7
0
def spectral_centroid(args):
    psd = get_psd(args)
    fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap'])
    hopsize = nfft - noverlap
    return rosaft.spectral_centroid(y=None,
                                    sr=fs,
                                    S=psd,
                                    n_fft=nfft,
                                    hop_length=hopsize)
コード例 #8
0
def get_spectral_features(x,
                          fs,
                          fmin=[],
                          fmax=[],
                          nfft=2048,
                          do_plot=False,
                          logscale=1):
    if fmin and fmax:
        spect_centroid = np.mean(
            feature.spectral_centroid(x,
                                      fs,
                                      n_fft=nfft,
                                      freq=np.linspace(fmin, fmax,
                                                       1 + int(nfft / 2))))
        spect_rolloff = np.mean(
            feature.spectral_rolloff(x,
                                     fs,
                                     n_fft=nfft,
                                     freq=np.linspace(fmin, fmax,
                                                      1 + int(nfft / 2))))
    else:
        spect_centroid = np.mean(feature.spectral_centroid(x, fs, n_fft=nfft))
        spect_rolloff = np.mean(feature.spectral_rolloff(x, fs, n_fft=nfft))
    peaks_freq, peak_amps, pxx_db, freqs = find_spectrum_peaks(
        x, fs, fmin, fmax, nfft)
    n_peaks = peaks_freq.size
    if do_plot:
        colors = sns.color_palette(n_colors=3)
        f = plt.figure()
        ax = f.add_subplot(111)
        ax.plot(freqs, pxx_db, color=colors[0])
        ax.axvline(spect_centroid, color=colors[2])
        ax.scatter(peaks_freq, peak_amps, color=colors[1])
        # ax.axvline(spect_rolloff)
        ax.autoscale(axis="x", tight=True)
        ax.set(xlabel='Frequency (Hz)',
               ylabel='Gain (dB)',
               title='Spectral Features')
        if logscale:
            ax.set_xscale('log')
            ax.grid(True, which="both", ls="-")
        plt.legend(['Pxx (dB)', 'Spectral Centroid', 'Spectral Peaks'])
    return spect_centroid, spect_rolloff, peaks_freq, pxx_db, freqs
コード例 #9
0
ファイル: audio_features.py プロジェクト: dj-d/MusicGenRetor
    def get_spectral_centroid(self, outside_series=None, outside_sr=None):
        """

        :return:
        """

        y = self.select_series(outside_series)
        sr = self.select_sr(outside_sr)

        return spectral_centroid(y, sr=sr)[0]
コード例 #10
0
    def feature_engineer(self, audio_data):
        """
        Extract features using librosa.feature.

        Each signal is cut into frames, features are computed for each frame and averaged [median].
        The numpy array is transformed into a data frame with named columns.

        :param audio_data: the input signal samples with frequency 44.1 kHz
        :return: a numpy array (numOfFeatures x numOfShortTermWindows)
        """

        zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME)

        # rmse_feat = rmse(y=audio_data, hop_length=self.FRAME)

        mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)

        spectral_centroid_feat = spectral_centroid(y=audio_data,
                                                   sr=self.RATE,
                                                   hop_length=self.FRAME)

        spectral_rolloff_feat = spectral_rolloff(y=audio_data,
                                                 sr=self.RATE,
                                                 hop_length=self.FRAME,
                                                 roll_percent=0.90)

        spectral_bandwidth_feat = spectral_bandwidth(y=audio_data,
                                                     sr=self.RATE,
                                                     hop_length=self.FRAME)

        # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME)

        concat_feat = np.concatenate(
            (
                zcr_feat,
                # rmse_feat,
                mfcc_feat,
                spectral_centroid_feat,
                spectral_rolloff_feat,
                # chroma_cens_feat
                spectral_bandwidth_feat),
            axis=0)

        median_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose()

        features_df = pd.DataFrame(data=median_feat,
                                   columns=self.COL,
                                   index=None)

        features_df['label'] = self.label

        return features_df
コード例 #11
0
    def extract_feature(self, audio_data):
        """
        extract features from audio data
        :param audio_data:
        :return:
        """

        zcr = lrf.zero_crossing_rate(audio_data,
                                     frame_length=self.FRAME,
                                     hop_length=self.FRAME / 2)
        feature_zcr = np.mean(zcr)

        ste = audio_utils.AudioUtils.ste(audio_data, 'hamming',
                                         int(20 * 0.001 * self.RATE))
        feature_ste = np.mean(ste)

        ste_acc = np.diff(ste)
        feature_steacc = np.mean(ste_acc[ste_acc > 0])

        stzcr = audio_utils.AudioUtils.stzcr(audio_data, 'hamming',
                                             int(20 * 0.001 * self.RATE))
        feature_stezcr = np.mean(stzcr)

        mfcc = lrf.mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)
        feature_mfcc = np.mean(mfcc, axis=1)

        spectral_centroid = lrf.spectral_centroid(y=audio_data,
                                                  sr=self.RATE,
                                                  hop_length=self.FRAME / 2)
        feature_spectral_centroid = np.mean(spectral_centroid)

        spectral_bandwidth = lrf.spectral_bandwidth(y=audio_data,
                                                    sr=self.RATE,
                                                    hop_length=self.FRAME / 2)
        feature_spectral_bandwidth = np.mean(spectral_bandwidth)

        spectral_rolloff = lrf.spectral_rolloff(y=audio_data,
                                                sr=self.RATE,
                                                hop_length=self.FRAME / 2,
                                                roll_percent=0.90)
        feature_spectral_rolloff = np.mean(spectral_rolloff)

        spectral_flatness = lrf.spectral_flatness(y=audio_data,
                                                  hop_length=self.FRAME / 2)
        feature_spectral_flatness = np.mean(spectral_flatness)

        features = np.append([
            feature_zcr, feature_ste, feature_steacc, feature_stezcr,
            feature_spectral_centroid, feature_spectral_bandwidth,
            feature_spectral_rolloff, feature_spectral_flatness
        ], feature_mfcc)
        return features, self.label
コード例 #12
0
ファイル: main.py プロジェクト: RonaldDijks/sample
def predict(file):

    label_encoder = LabelEncoder()
    label_encoder.classes_ = np.load(os.path.join(outdir, encoder_filename))

    model_json_handle = open(os.path.join(outdir, model_filename), "r")

    model_json = model_json_handle.read()
    model_json_handle.close()

    model = model_from_json(model_json)

    model.load_weights(os.path.join(outdir, model_weights_filename))

    model.compile(loss='categorical_crossentropy',
                  metrics=['accuracy'],
                  optimizer='adam')

    file_path = os.path.join(os.getcwd(), file)

    y, sr = librosa.load(file_path, res_type='kaiser_fast')

    prediction_feature = np.array([get_mfcc(y, sr)])

    predicted_proba_vector = model.predict_proba(prediction_feature)
    predicted_proba = predicted_proba_vector[0]

    fixed_size = 44100

    centroid = spectral_centroid(y=y, sr=sr)
    frequency = np.average(centroid)

    centroid = fix_length(centroid, size=fixed_size)

    length = librosa.get_duration(y=y, sr=sr)

    result = {
        'file_path': file_path,
        'classes': {},
        'position': {
            'frequency': frequency,
            'length': length
        }
    }

    for i in range(len(predicted_proba)):
        category = label_encoder.inverse_transform(np.array([i]))
        result['classes'][category[0]] = format(predicted_proba[i], '.32f')

    return result
コード例 #13
0
ファイル: voice_detector.py プロジェクト: tomaskender/SUR
def reduce_noise_power(y, sr):

    cent = spectral_centroid(y=y, sr=sr)

    threshold_h = round(np.median(cent)) * 1.5
    threshold_l = round(np.median(cent)) * 0.1

    less_noise = AudioEffectsChain().lowshelf(
        gain=-30.0, frequency=threshold_l,
        slope=0.8).highshelf(gain=-12.0, frequency=threshold_h,
                             slope=0.5)  #.limiter(gain=6.0)
    y_clean = less_noise(y)

    return y_clean
コード例 #14
0
def feature_extractor (y, sr):
	print('вошли в процедyрy feature_extractor')
	from librosa import feature as f        

	print('либрозy как f загрyзили')                                                    
	rmse = f.rms(y=y)[0] #f.rmse (y = y)                            
	spec_cent = f.spectral_centroid (y = y, sr = sr) 
	spec_bw = f.spectral_bandwidth (y = y, sr = sr)  
	rolloff = f.spectral_rolloff (y = y, sr = sr)     
	zcr = f.zero_crossing_rate (y)       
	mfcc = f.mfcc(y = y, sr = sr)     # mel cepstral coefficients
	chroma = f.chroma_stft(y=y, sr=sr)
	output = np.vstack([rmse, spec_cent, spec_bw, rolloff, zcr, chroma, mfcc]).T
	print('feature_extractor закончил работy')                                                    
	return (output)
コード例 #15
0
def get_mir(audio_path):

    hop_length = 200
    # Spectral Flux/Flatness, MFCCs, SDCs
    spectrogram = madmom.audio.spectrogram.Spectrogram(audio_path,
                                                       frame_size=2048,
                                                       hop_size=hop_length,
                                                       fft_size=4096)
    # only take 30s snippets to align data
    audio = madmom.audio.signal.Signal(audio_path,
                                       dtype=float,
                                       start=0,
                                       stop=30)

    all_features = []

    #print(spectrogram.shape)
    #print(audio.shape)
    #print('signal sampling rate: {}'.format(audio.sample_rate))

    # madmom features
    all_features.extend([
        spectral_flux(spectrogram),
        superflux(spectrogram),
        complex_flux(spectrogram)
    ])  #, MFCC(spectrogram)])

    # mfcc still wrong shape as it is a 2 array

    # librosa features
    libr_features = [
        spectral_centroid(audio, hop_length=hop_length),
        spectral_bandwidth(audio, hop_length=hop_length),
        spectral_flatness(audio, hop_length=hop_length),
        spectral_rolloff(audio, hop_length=hop_length),
        rmse(audio, hop_length=hop_length),
        zero_crossing_rate(audio, hop_length=hop_length)
    ]  #, mfcc(audio)])
    for libr in libr_features:
        all_features.append(np.squeeze(libr, axis=0))
    # for feature in all_features:
    #     print(feature.shape)
    X = np.stack(all_features, axis=1)[na, :, :]
    return X
コード例 #16
0
    def _calc_feat(self, window, feat_name):
        feat = None
        # calculate feature
        if feat_name == 'mfcc':
            feat = FT.mfcc(y=window, sr=self.sr, n_mfcc=_N_MFCC)
        elif feat_name == 'chroma_stft':
            feat = FT.chroma_stft(y=window, sr=self.sr)
        elif feat_name == 'melspectrogram':
            feat = FT.melspectrogram(y=window,
                                     sr=self.sr,
                                     n_mels=128,
                                     n_fft=1024,
                                     hop_length=512)
            feat = L.power_to_db(feat)
        elif feat_name == 'spectral_centroid':
            feat = FT.spectral_centroid(y=window, sr=self.sr)
        elif feat_name == 'spectral_rolloff':
            feat = FT.spectral_rolloff(y=window, sr=self.sr)
        elif feat_name == 'tonnetz':
            feat = FT.tonnetz(y=window, sr=self.sr)
        elif feat_name == 'zero_crossing_rate':
            feat = FT.zero_crossing_rate(y=window)
        else:
            assert False, 'Invalid feature'

        # pool feature from multiple frames
        if self.feature_pool == 'sum':
            feat = feat.sum(axis=1)
        elif self.feature_pool == 'max':
            feat = feat.max(axis=1)
        elif self.feature_pool == 'mean':
            feat = feat.mean(axis=1)
        elif self.feature_pool == 'flatten':
            feat = feat.flatten()
        elif self.feature_pool == 'none':
            pass
        else:
            assert False, 'Invalid feature pooling scheme'

        # normalize features
        if self.l2_norm and feat.shape[0] > 1:
            feat /= np.linalg.norm(feat)
        return feat
コード例 #17
0
    def compute_librosa_features(self, audio_data, feat_name):
        """
        Compute feature using librosa methods

        :param audio_data: signal
        :param feat_name: feature to compute
        :return: np array
        """

        # if rmse_feat.shape == (1, 427):
        #     rmse_feat = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1)

        if feat_name == 'zero_crossing_rate':
            return zero_crossing_rate(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'rmse':
            return rmse(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'mfcc':
            return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)
        elif feat_name == 'spectral_centroid':
            return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
        elif feat_name == 'spectral_rolloff':
            return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90)
        elif feat_name == 'spectral_bandwidth':
            return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
コード例 #18
0
    def feature_engineer(self, audio_data):
        """
        Extract features using librosa.feature.

        Each signal is cut into frames, features are computed for each frame and averaged [median].
        The numpy array is transformed into a data frame with named columns.

        :param audio_data: the input signal samples with frequency 44.1 kHz
        :return: a numpy array (numOfFeatures x numOfShortTermWindows)
        """

        logging.info('Computing zero_crossing_rate...')
        start = timeit.default_timer()

        zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        logging.info('Computing rmse...')
        start = timeit.default_timer()

        rmse_feat = rmse(y=audio_data, hop_length=self.FRAME)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        logging.info('Computing mfcc...')
        start = timeit.default_timer()

        mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        logging.info('Computing spectral centroid...')
        start = timeit.default_timer()

        spectral_centroid_feat = spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        logging.info('Computing spectral rolloff...')
        start = timeit.default_timer()

        spectral_rolloff_feat = spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        logging.info('Computing spectral bandwidth...')
        start = timeit.default_timer()

        spectral_bandwidth_feat = spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        # logging.info('Computing chroma cens...')
        # start = timeit.default_timer()
        #
        # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror
        # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
        #
        # stop = timeit.default_timer()
        # logging.info('Time taken: {0}'.format(stop - start))

        concat_feat = np.concatenate((zcr_feat,
                                      rmse_feat,
                                      mfcc_feat,
                                      spectral_centroid_feat,
                                      spectral_rolloff_feat,
                                      # chroma_cens_feat,
                                      spectral_bandwidth_feat
                                      ), axis=0)

        logging.info('Averaging...')
        start = timeit.default_timer()

        mean_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose()

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        return mean_feat, self.label
コード例 #19
0
def get_spectral_features(x,
                          fs,
                          fmin=[],
                          fmax=[],
                          nfft=2048,
                          do_plot=False,
                          logscale=True):
    """ Compute some spectral features using the `librosa <https://librosa.github.io/librosa/index.html>`_ library :
     * Spectrum centroid
     * Spectrum rolloff
     * Peaks in the power spectral density

    Parameters
    ----------
    x : array
        Input array. Must be 1D.
    fs : float
        Sampling frequency (Hz)
    fmin : float
        Minimum frequency (Hz)
    fmax : float
        Maximum frequency (Hz)
    nfft : int
        Number of points for the FFT - Default: 2048
    do_plot : bool
        If true, plot the spectral features - Default: False
    logscale : bool
        If True, use a log-scale for the x-axis - Default : True

    Returns
    -------
    spect_centroid : float
        Spetrum centroid. See :func:`librosa.feature.spectral_centroid`
    spect_rolloff : float
        Spectrum rolloff. See :func:`librosa.feature.spectral_centroid`
    peaks_freq : array
        Peak in the spectrum
    pxx_db : array
        Power Spectral Density (PSD), in dB
    freqs : array
        Frequency associated with the PSD

    """
    x = np.array(x)
    if x.ndim > 1:
        raise ValueError('Input x must be 1D')
    if not HAS_LIBROSA:
        raise ImportError('Librosa is not installed/available')
    if fmin and fmax:
        spect_centroid = np.mean(
            feature.spectral_centroid(x,
                                      fs,
                                      n_fft=nfft,
                                      freq=np.linspace(fmin, fmax,
                                                       1 + int(nfft / 2))))
        spect_rolloff = np.mean(
            feature.spectral_rolloff(x,
                                     fs,
                                     n_fft=nfft,
                                     freq=np.linspace(fmin, fmax,
                                                      1 + int(nfft / 2))))
    else:
        spect_centroid = np.mean(feature.spectral_centroid(x, fs, n_fft=nfft))
        spect_rolloff = np.mean(feature.spectral_rolloff(x, fs, n_fft=nfft))
    peaks_freq, peak_amps, pxx_db, freqs = find_spectrum_peaks(
        x, fs, fmin, fmax, nfft)
    # n_peaks = peaks_freq.size
    if do_plot:
        colors = sns.color_palette(n_colors=3)
        f = plt.figure()
        ax = f.add_subplot(111)
        ax.plot(freqs, pxx_db, color=colors[0])
        ax.axvline(spect_centroid, color=colors[2])
        ax.scatter(peaks_freq, peak_amps, color=colors[1])
        # ax.axvline(spect_rolloff)
        ax.autoscale(axis="x", tight=True)
        ax.set(xlabel='Frequency (Hz)',
               ylabel='Gain (dB)',
               title='Spectral Features')
        if logscale:
            ax.set_xscale('log')
            ax.grid(True, which="both", ls="-")
        plt.legend(['Pxx (dB)', 'Spectral Centroid', 'Spectral Peaks'])
    return spect_centroid, spect_rolloff, peaks_freq, pxx_db, freqs
コード例 #20
0
def get_feature_from_librosa(wave_name, window):
    #print wave_name
    (rate, sig) = wav.read(wave_name)

    chroma_stft_feat = feature.chroma_stft(sig,
                                           rate,
                                           n_fft=window,
                                           hop_length=window / 2)
    #print chroma_stft_feat.shape
    mfcc_feat = feature.mfcc(y=sig, sr=rate, n_mfcc=13, hop_length=window / 2)
    mfcc_feat = mfcc_feat[1:, :]
    #print mfcc_feat.shape
    d_mfcc_feat = feature.delta(mfcc_feat)
    #print d_mfcc_feat.shape
    d_d_mfcc_feat = feature.delta(d_mfcc_feat)
    #print d_d_mfcc_feat.shape
    zero_crossing_rate_feat = feature.zero_crossing_rate(sig,
                                                         frame_length=window,
                                                         hop_length=window / 2)
    #print zero_crossing_rate_feat.shape

    S = librosa.magphase(
        librosa.stft(sig,
                     hop_length=window / 2,
                     win_length=window,
                     window='hann'))[0]
    rmse_feat = feature.rmse(S=S)
    #print rmse_feat.shape

    centroid_feat = feature.spectral_centroid(sig,
                                              rate,
                                              n_fft=window,
                                              hop_length=window / 2)
    #print centroid_feat.shape

    bandwith_feat = feature.spectral_bandwidth(sig,
                                               rate,
                                               n_fft=window,
                                               hop_length=window / 2)
    #print bandwith_feat.shape

    contrast_feat = feature.spectral_contrast(sig,
                                              rate,
                                              n_fft=window,
                                              hop_length=window / 2)
    #print contrast_feat.shape
    rolloff_feat = feature.spectral_rolloff(sig,
                                            rate,
                                            n_fft=window,
                                            hop_length=window / 2)  #计算滚降频率
    #print rolloff_feat.shape

    poly_feat = feature.poly_features(sig,
                                      rate,
                                      n_fft=window,
                                      hop_length=window /
                                      2)  #拟合一个n阶多项式到谱图列的系数。
    #print poly_feat.shape
    #==============================================================================
    #     print(chroma_stft_feat.shape)
    #     #print(corr_feat.shape)
    #     print(mfcc_feat.shape)
    #     print(d_mfcc_feat.shape)
    #     print(d_d_mfcc_feat.shape)
    #     print(zero_crossing_rate_feat.shape)
    #     print(rmse_feat.shape)
    #     print(centroid_feat.shape)
    #     print(bandwith_feat.shape)
    #     print(contrast_feat.shape)
    #     print(rolloff_feat.shape)
    #     print(poly_feat.shape)
    #==============================================================================
    feat = numpy.hstack(
        (chroma_stft_feat.T, mfcc_feat.T, d_mfcc_feat.T, d_d_mfcc_feat.T,
         zero_crossing_rate_feat.T, rmse_feat.T, centroid_feat.T,
         bandwith_feat.T, contrast_feat.T, rolloff_feat.T, poly_feat.T))
    feat = feat.T
    return feat  #一行代表一帧的特征
def main(aud):
    waves = {}
    sg, mask, data, audio_mask, sample_rate = load_audio(str(aud))
    waves['audio'] = data[audio_mask]
    length = len(data[audio_mask])

    w = myfunc()
    windo = w(length)

    windows = {}
    wave = waves['audio']
    species = 'gens_specie'
    windows[species] = []
    for i in range(0, int(len(wave) / 6.144000e+03)):
        windows[species].append(wave[i:int(i + 6.144000e+03)])

#creating df for test audio
    new_dataset_test = pd.DataFrame()
    for species in windows.keys():
        for i in range(0, len(windows)):
            data_point = {
                'species': species.split('_')[1],
                'genus': species.split('_')[0]
            }
            #print(type(data_point))
            spec_centroid = feature.spectral_centroid(windows[species][i])[0]
            #print(windows_fixed[species][i])
            chroma = feature.chroma_stft(windows[species][i], sample_rate)
            for j in range(0, 13):
                data_point['spec_centr_' + str(j)] = spec_centroid[j]
                for k in range(0, 12):
                    data_point['chromogram_' + str(k) + "_" +
                               str(j)] = chroma[k, j]
            new_dataset_test = new_dataset_test.append(data_point,
                                                       ignore_index=True)

    #classification of test audio
    features = list(new_dataset.columns)
    features.remove('species')
    features.remove('genus')

    X = new_dataset[features].values
    y = new_dataset['species'].values
    X_test = new_dataset_test[features].values
    y_test = new_dataset_test['species'].values

    NB = naive_bayes.GaussianNB()
    SSS = sklearn.model_selection.StratifiedShuffleSplit(n_splits=5,
                                                         test_size=0.2)

    for train_index, val_index in SSS.split(X, y):
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        NB.fit(X_train, y_train)
        y_pred = NB.predict(X_test)
    check = pd.DataFrame()
    df = pd.read_csv("/home/megha/Desktop/Audio_website/templates/descr.csv",
                     delimiter=';')
    check = df.loc[df['check'] == y_pred[0]]
    #print(check['Description'])
    #accs.append(sklearn.metrics.accuracy_score(y_pred=y_pred, y_true=y_val))
    return y_pred[0], check
コード例 #22
0
def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]):
    """
    extracts features with help of librosa
    :param soundwave: extracted soundwave from file
    :param sampling_rate: sampling rate
    :param feature_list: list of features to compute
    :param sound_name: type of sound, i.e. dog
    :return: np.array of all features for the soundwave
    """
    print("Computing features for ",sound_name)

    if len(feature_list)==0:
        feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram",
                      "mfcc","rmse","spectral_centroid","spectral_bandwidth",
                      "spectral_contrast","spectral_flatness","spectral_rolloff",
                      "poly_features","tonnetz","zero_crossing_rate"]

    features=[]


    #feature_len
    #"chroma_stft":12
    if "chroma_stft" in feature_list:
        features.append(feat.chroma_stft(soundwave, sampling_rate))

    #"chroma_cqt":12
    if "chroma_cqt" in feature_list:
        features.append(feat.chroma_cqt(soundwave, sampling_rate))

    #"chroma_cens":12
    if "chroma_cens" in feature_list:
        features.append(feat.chroma_cens(soundwave, sampling_rate))

    #"malspectrogram":128
    if "melspectrogram" in feature_list:
        features.append(feat.melspectrogram(soundwave, sampling_rate))

    #"mfcc":20
    if "mfcc" in feature_list:
        features.append(feat.mfcc(soundwave, sampling_rate))

    #"rmse":1
    if "rmse" in feature_list:
        features.append(feat.rmse(soundwave))

    #"spectral_centroid":1
    if "spectral_centroid" in feature_list:
        features.append(feat.spectral_centroid(soundwave, sampling_rate))

    #"spectral_bandwidth":1
    if "spectral_bandwidth" in feature_list:
        features.append(feat.spectral_bandwidth(soundwave, sampling_rate))

    #"spectral_contrast":7
    if "spectral_contrast" in feature_list:
        features.append(feat.spectral_contrast(soundwave, sampling_rate))

    #"spectral_flatness":1
    if "spectral_flatness" in feature_list:
        features.append(feat.spectral_flatness(soundwave))

    #"spectral_rolloff":1
    if "spectral_rolloff" in feature_list:
        features.append(feat.spectral_rolloff(soundwave, sampling_rate))

    #"poly_features":2
    if "poly_features" in feature_list:
        features.append(feat.poly_features(soundwave, sampling_rate))

    #"tonnetz":6
    if "tonnetz" in feature_list:
        features.append(feat.tonnetz(soundwave, sampling_rate))

    #"zero_crossing_rate":1
    if "zero_crossing_rate" in feature_list:
        features.append(feat.zero_crossing_rate(soundwave))


    return np.concatenate(features)
コード例 #23
0
train_data = np.array([])
for chunk in train_data_reader:
    #print(chunk)
    chunk1 = np.array(chunk)
    for thing in chunk1:
        print(counter)
        thing1 = np.array(thing)
        #print(thing1)
        row = np.array([])
        cstft = np.mean(lf.chroma_stft(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, cstft))
        cqt = np.mean(lf.chroma_cqt(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, cqt))
        sens = np.mean(lf.chroma_cens(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, sens))
        spcent = np.mean(lf.spectral_centroid(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, spcent))
        flatness = np.mean(lf.spectral_flatness(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, flatness))
        rolloff = np.mean(lf.spectral_rolloff(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, rolloff))
        mspec = np.mean(lf.melspectrogram(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, mspec))
        mfcc = np.mean(lf.mfcc(thing1[:-1], n_mfcc=30).T, axis=0)
        row = np.concatenate((row, mfcc))
        tonnetz = np.mean(lf.tonnetz(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, tonnetz))
        rmse = np.mean(lf.rmse(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, rmse))
        contrast = np.mean(lf.spectral_contrast(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, contrast))
コード例 #24
0

header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

file = open('data_training.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
sukus = 'banjar_hulu banjar_kuala dayak_bakumpai dayak_ngaju'.split()
for g in sukus:
    for filename in os.listdir(f'data_training/{g}'):
        songname = f'data_training/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = fitur.chroma_stft(y=y, sr=sr)
        spec_cent = fitur.spectral_centroid(y=y, sr=sr)
        spec_bw = fitur.spectral_bandwidth(y=y, sr=sr)
        rmse = fitur.rmse(y)
        zcr = fitur.zero_crossing_rate(y)
        mfcc = fitur.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)}  {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data_training.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())
コード例 #25
0
ファイル: test.py プロジェクト: khacminh181/english-sound
    print(crop_feat.shape)
    print(crop_feat)
    crop_feat = np.pad(crop_feat, (0, maxlen - len(crop_feat)),
                       mode='constant')
    print(crop_feat)
    return crop_feat


features = []
feat = mfcc(y, sr, nfilt=10, winstep=0.02)
for i in range(0, feat.shape[0] - 10, 5):
    print(i)
    x = crop_feature(feat, i, nb_step=10)
    print(x.shape)
    features.append(x)
print("shape {}".format(librosa.feature.rms(y).shape))
centroid = feature.spectral_centroid(y, sr)
print(centroid.shape)
bandwidth = feature.spectral_bandwidth(y, sr)
print(bandwidth.shape)
print(feature.spectral_rolloff(y, sr).shape)
print(feature.rms(y).shape)
name = np.full(bandwidth.shape, "haha")

max = np.concatenate((name.T, centroid.T, bandwidth.T, data.T), axis=1)

#
# test = np.zeros((41, 10))
# print(test.shape)
# test =test[0 : 11]
# print(test.shape)
コード例 #26
0
def feature_extraction_all(signal, sr, n_mfcc, buffer_len,
                           normalization_values):
    """
    Feature extraction interface
    :param signal: Signal
    :param sr: Signal
    :param n_mfcc: Signal
    :param buffer_len: Signal
    :param normalization_values: normalization values of the dataset
    :output features: Array of features

    Features are extracted from the incoming audio signal when an onset is detected.
    """
    features = []
    signal = np.array(signal)

    if signal.size != 0:
        S, phase = librosa.magphase(
            librosa.stft(y=signal,
                         n_fft=buffer_len,
                         hop_length=int(buffer_len / 4)))

        # Mel Frequency cepstral coefficients
        mfcc = feature.mfcc(y=signal,
                            sr=sr,
                            n_mfcc=n_mfcc,
                            n_fft=int(512 * 2),
                            hop_length=int(128 * 2))
        mfcc_mean = np.mean(mfcc, axis=1)
        mfcc_std = np.std(mfcc, axis=1)

        # RMS
        rms = feature.rms(S=S,
                          frame_length=buffer_len,
                          hop_length=int(buffer_len / 4))
        rms_mean = np.mean(rms, axis=1)
        rms_std = np.std(rms, axis=1)

        # Spectral Centroid
        spectral_centroid = feature.spectral_centroid(S=S, sr=sr)
        spectral_centroid_mean = np.mean(spectral_centroid, axis=1)
        spectral_centroid_std = np.std(spectral_centroid, axis=1)

        # Rolloff
        spectral_rolloff = feature.spectral_rolloff(S=S, sr=sr)
        spectral_rolloff_mean = np.mean(spectral_rolloff, axis=1)
        spectral_rolloff_std = np.std(spectral_rolloff, axis=1)

        # Bandwidth
        spectral_bandwidth = feature.spectral_bandwidth(S=S, sr=sr)
        spectral_bandwidth_mean = np.mean(spectral_bandwidth, axis=1)
        spectral_bandwidth_std = np.std(spectral_bandwidth, axis=1)

        # Contrast
        spectral_contrast = feature.spectral_contrast(S=S, sr=sr)
        spectral_contrast_mean = np.mean(spectral_contrast, axis=1)
        spectral_contrast_std = np.std(spectral_contrast, axis=1)

        # Flatness
        spectral_flatness = feature.spectral_flatness(S=S)
        spectral_flatness_mean = np.mean(spectral_flatness, axis=1)
        spectral_flatness_std = np.std(spectral_flatness, axis=1)

        if len(normalization_values) > 1:
            # Duration
            features.append(
                normalize(len(signal), normalization_values['duration']))

            features.extend(
                normalize(
                    mfcc_mean, normalization_values[[
                        'mfcc_mean_1', 'mfcc_mean_2', 'mfcc_mean_3',
                        'mfcc_mean_4', 'mfcc_mean_5', 'mfcc_mean_6',
                        'mfcc_mean_7', 'mfcc_mean_8', 'mfcc_mean_9',
                        'mfcc_mean_10'
                    ]]))

            features.extend(
                normalize(
                    mfcc_std, normalization_values[[
                        'mfcc_std_1', 'mfcc_std_2', 'mfcc_std_3', 'mfcc_std_4',
                        'mfcc_std_5', 'mfcc_std_6', 'mfcc_std_7', 'mfcc_std_8',
                        'mfcc_std_9', 'mfcc_std_10'
                    ]]))

            features.extend(
                normalize(rms_mean, normalization_values['rms_mean']))

            features.extend(normalize(rms_std,
                                      normalization_values['rms_std']))

            features.extend(
                normalize(spectral_centroid_mean,
                          normalization_values['spectral_centroid_mean']))

            features.extend(
                normalize(spectral_centroid_std,
                          normalization_values['spectral_centroid_std']))

            features.extend(
                normalize(spectral_rolloff_mean,
                          normalization_values['spectral_rolloff_mean']))

            features.extend(
                normalize(spectral_rolloff_std,
                          normalization_values['spectral_rolloff_std']))

            features.extend(
                normalize(spectral_bandwidth_mean,
                          normalization_values['spectral_bandwidth_mean']))

            features.extend(
                normalize(spectral_bandwidth_std,
                          normalization_values['spectral_bandwidth_std']))

            features.extend(
                normalize(
                    spectral_contrast_mean, normalization_values[[
                        'spectral_contrast_mean_1', 'spectral_contrast_mean_2',
                        'spectral_contrast_mean_3', 'spectral_contrast_mean_4',
                        'spectral_contrast_mean_5', 'spectral_contrast_mean_6',
                        'spectral_contrast_mean_7'
                    ]]))

            features.extend(
                normalize(
                    spectral_contrast_std, normalization_values[[
                        'spectral_contrast_std_1', 'spectral_contrast_std_2',
                        'spectral_contrast_std_3', 'spectral_contrast_std_4',
                        'spectral_contrast_std_5', 'spectral_contrast_std_6',
                        'spectral_contrast_std_7'
                    ]]))

            features.extend(
                normalize(spectral_flatness_mean,
                          normalization_values['spectral_flatness_mean']))

            features.extend(
                normalize(spectral_flatness_std,
                          normalization_values['spectral_flatness_std']))
        else:
            features.append(len(signal))
            features.extend(mfcc_mean)
            features.extend(mfcc_std)
            features.extend(rms_mean)
            features.extend(rms_std)
            features.extend(spectral_centroid_mean)
            features.extend(spectral_centroid_std)
            features.extend(spectral_rolloff_mean)
            features.extend(spectral_rolloff_std)
            features.extend(spectral_bandwidth_mean)
            features.extend(spectral_bandwidth_std)
            features.extend(spectral_contrast_mean)
            features.extend(spectral_contrast_std)
            features.extend(spectral_flatness_mean)
            features.extend(spectral_flatness_std)

        features = np.array(features)
    return features
コード例 #27
0
ファイル: features.py プロジェクト: timrappold/WeeBro
    def featurize(self):
        """
        Extract features using librosa.feature. Convert wav vec, the sound
        amplitude as a function of time, to a variety of extracted features,
        such as Mel Frequency Cepstral Coeffs, Root Mean Square Energy, Zero
        Crossing Rate, etc.

        :param observations
        :ptype: list of tuples (label, wav vec, sampling rate)
        :return:
        :rtype:

        Each signal is cut into frames, features are computed for each frame and averaged [median].
        The numpy array is transformed into a data frame with named columns.
        :param raw: the input signal samples with frequency 44.1 kHz
        :return: a numpy array (numOfFeatures x numOfShortTermWindows)
        """

        start = timeit.default_timer()

        logging.debug('Loading Librosa raw audio vector...')

        raw, _ = librosa.load(self.path, sr=self.RATE, mono=True)
        raw = raw[:self.TRUNCLENGTH]

        if len(raw) < self.TRUNCLENGTH:
            logging.info(f"Not featurizing {self.path} because raw vector is "
                         f"too short. `None` will be returned for all data "
                         f"formats.")
            return self

        logging.debug('Computing Zero Crossing Rate...')
        zcr_feat = zero_crossing_rate(y=raw, hop_length=self.FRAME)

        logging.debug('Computing RMSE ...')
        rmse_feat = rmse(y=raw, hop_length=self.FRAME)

        logging.debug('Computing MFCC...')
        mfcc_feat = mfcc(y=raw, sr=self.RATE, n_mfcc=self.N_MFCC)

        logging.debug('Computing spectral centroid...')
        spectral_centroid_feat = spectral_centroid(y=raw,
                                                   sr=self.RATE,
                                                   hop_length=self.FRAME)

        logging.debug('Computing spectral roll-off ...')
        spectral_rolloff_feat = spectral_rolloff(y=raw,
                                                 sr=self.RATE,
                                                 hop_length=self.FRAME,
                                                 roll_percent=0.90)

        logging.debug('Computing spectral bandwidth...')
        spectral_bandwidth_feat = spectral_bandwidth(y=raw,
                                                     sr=self.RATE,
                                                     hop_length=self.FRAME)

        logging.debug('Concatenate all features...')
        mat = np.concatenate((
            zcr_feat,
            rmse_feat,
            spectral_centroid_feat,
            spectral_rolloff_feat,
            spectral_bandwidth_feat,
            mfcc_feat,
        ),
                             axis=0)

        logging.debug(f'Mat shape: {mat.shape}')

        logging.debug(f'Create self.raw...')
        self.raw = raw.reshape(1, -1)

        logging.debug(f'Create self.vec by averaging mat along time dim...')
        self.vec = np.mean(mat, axis=1, keepdims=True).reshape(1, -1)

        logging.debug(f'Vec shape: {self.vec.shape}')

        logging.debug(f'Create self.mat...')
        assert mat.shape == (18, 426), 'Matrix dims do not match (426,18)'
        self.mat = mat.reshape(
            1,
            18,
            426,
        )

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        return self
コード例 #28
0
def centroid(wave_form, sample_rate, hop_length):
    return feature.spectral_centroid(y=wave_form,
                                     sr=sample_rate,
                                     hop_length=hop_length).T