Python rmse 예제들, librosa.feature.rmse Python 예제들

예제 #1

0

파일 보기

파일: process_crem_features.py 프로젝트: oscargomezq/crem_marl_analysis

def get_mean_percussive_ratio_dbs(feat_files, margin=3.0):
    mean_percussive_ratios_db = []
    idx = 0
    idxs = []
    for feat_file in tqdm.tqdm(feat_files):
        try:
            features = np.load(feat_file)
            idxs.append(idx)
            idx += 1
        except Exception as e:
            print('Skipping {}. {}'.format(feat_file, e))
            idx += 1
            continue
        D = features['linspec_mag'] * np.exp(1.j * features['linspec_mag'])

        H, P = hpss(D, margin=margin)
        Pm, Pp = magphase(P)
        S, phase = magphase(D)

        P_rms = rmse(S=Pm)
        S_rms = rmse(S=S)
        percussive_ratio = P_rms / S_rms
        mean_percussive_ratio_db = amplitude_to_db(
            np.array([np.mean(percussive_ratio)]))[0]

        mean_percussive_ratios_db.append(mean_percussive_ratio_db)
    mean_percussive_ratios_db = np.array(mean_percussive_ratios_db)

    return mean_percussive_ratios_db, idxs

예제 #2

0

파일 보기

파일: feature_engineer.py 프로젝트: shiv50084/baby_cry_mlflow

    def compute_librosa_features(self, audio_data, feat_name):
        """
        Compute feature using librosa methods

        :param audio_data: signal
        :param feat_name: feature to compute
        :return: np array
        """
        # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror
        # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME)

        logging.info('Computing {}...'.format(feat_name))

        if feat_name == 'zero_crossing_rate':
            return zero_crossing_rate(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'rmse':
            return rmse(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'mfcc':
            return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)
        elif feat_name == 'spectral_centroid':
            return spectral_centroid(y=audio_data,
                                     sr=self.RATE,
                                     hop_length=self.FRAME)
        elif feat_name == 'spectral_rolloff':
            return spectral_rolloff(y=audio_data,
                                    sr=self.RATE,
                                    hop_length=self.FRAME,
                                    roll_percent=0.90)
        elif feat_name == 'spectral_bandwidth':
            return spectral_bandwidth(y=audio_data,
                                      sr=self.RATE,
                                      hop_length=self.FRAME)

예제 #3

0

파일 보기

파일: feature_engineer.py 프로젝트: dthphuong/baby_cry_detection

    def compute_librosa_features(self, audio_data, feat_name):
        """
        Compute feature using librosa methods

        :param audio_data: signal
        :param feat_name: feature to compute
        :return: np array
        """

        # if rmse_feat.shape == (1, 427):
        #     rmse_feat = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1)

        if feat_name == 'zero_crossing_rate':
            return zero_crossing_rate(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'rmse':
            return rmse(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'mfcc':
            return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)
        elif feat_name == 'spectral_centroid':
            return spectral_centroid(y=audio_data,
                                     sr=self.RATE,
                                     hop_length=self.FRAME)
        elif feat_name == 'spectral_rolloff':
            return spectral_rolloff(y=audio_data,
                                    sr=self.RATE,
                                    hop_length=self.FRAME,
                                    roll_percent=0.90)
        elif feat_name == 'spectral_bandwidth':
            return spectral_bandwidth(y=audio_data,
                                      sr=self.RATE,
                                      hop_length=self.FRAME)

예제 #4

0

파일 보기

def initialize_bpf(filename, filepath, only_show=False, rewrite=False):
    wav_filename = audioconvert.convert_to_monowav(filename, filepath)
    timestart = time.time()
    y, sr = load(wav_filename, dtype="float32", res_type=TYPE)

    print("{LOAD TIME}:%f" % (time.time() - timestart))
    tempo, beats = beat_track(y=y, tightness=100)  # 计算主要节拍点
    tempo1, beats1 = beat_track(y=y,
                                tightness=1)  # 计算节拍点，tightness就是对节拍的吸附性，越低越混乱
    onset_envelope = onset_strength(y=y)
    rms_envelope = rmse(y=y)
    # -----------RMS ENVELOPE
    tempo = normalize_tempo(tempo)
    MAX_RMS = np.max(rms_envelope)
    AVERAGE_RMS = np.mean(rms_envelope)
    onset_all_beat = []
    frame_all_beat = []
    for beat in beats1:
        onset_all_beat.append(onset_envelope[beat])
        frame_all_beat.append(beat)
    AVERAGE_ONSET = np.mean(onset_all_beat)
    new_frames_list = []
    if not os.path.exists("dat/plt/%s.plt" % filename) or rewrite:
        print("No plt found, initializing...")
        plt_file = open("dat/plt/%s.plt" % filename, mode="w")
        plt_file.write(
            repr((filename, rms_envelope.T.tolist(), onset_all_beat,
                  frame_all_beat, MAX_RMS, AVERAGE_RMS, AVERAGE_ONSET)))
        plt_file.close()
    plt_file = open("dat/plt/%s.plt" % filename, mode="r")
    plt_file_content = eval(plt_file.read())
    plt_process = Process(target=plt_show, args=plt_file_content)
    plt_process.start()
    if not only_show:
        for beat in beats1:
            if onset_envelope[beat] > AVERAGE_ONSET / ONSET_DETECT_RATIO \
                    or rms_envelope.T[beat] > MAX_RMS / RMS_RATIO:
                new_frames_list.append(beat)
        print("{MAX_ONSET}:%f" % onset_envelope.max())
        new_beats_frame = np.array(new_frames_list)
        mainbeatlocation = frames_to_time(beats)
        beatlocation = frames_to_time(new_beats_frame).tolist()
        beatmain = []

        for beat in beatlocation:  # 分别计算出每个节拍到主要节拍点的距离，也就是这个节拍的主要程度

            p = abs(mainbeatlocation - beat)
            # print("%f:   %f" % (beat, p.min()))
            beatmain.append(p.min())
        file = open("dat/bpf/%s.bpf" % filename, mode="w")
        file.write(
            repr([tempo, beatlocation, beatmain,
                  mainbeatlocation.tolist()]))
        file.close()
    if (os.path.exists("dat/%s.wav" % filename)):
        os.remove("dat/%s.wav" % filename)
    return "dat/bpf/%s.bpf" % filename

예제 #5

0

파일 보기

 def __init__(self, name, y, sr, per_order, text):
     self.name = name
     self.audio_timeseries = y
     self.sr = sr
     self.per_order = per_order
     self.text = text
     self.rmse_ = rmse(y)[0]
     self.spectral_bandwidth_ = spectral_bandwidth(y, sr = sr)[0]
     self.zero_crossing_rate_ = zero_crossing_rate(y)[0]
     self.label = None

예제 #6

0

파일 보기

파일: feature_engineer.py 프로젝트: vettel555/Animal-sounds-Embedded-Classifier

    def feature_engineer(self, audio_data):
        """
        Extract features using librosa.feature.

        Each signal is cut into frames, features are computed for each frame and averaged [median].
        The numpy array is transformed into a data frame with named columns.

        :param audio_data: the input signal samples with frequency 44.1 kHz
        :return: a numpy array (numOfFeatures x numOfShortTermWindows)
        """

        zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME)

        rmse_feat = rmse(y=audio_data, hop_length=self.FRAME)

        mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)

        spectral_centroid_feat = spectral_centroid(y=audio_data,
                                                   sr=self.RATE,
                                                   hop_length=self.FRAME)

        spectral_rolloff_feat = spectral_rolloff(y=audio_data,
                                                 sr=self.RATE,
                                                 hop_length=self.FRAME,
                                                 roll_percent=0.90)

        spectral_bandwidth_feat = spectral_bandwidth(y=audio_data,
                                                     sr=self.RATE,
                                                     hop_length=self.FRAME)

        # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME)

        concat_feat = np.concatenate(
            (
                zcr_feat,
                rmse_feat,
                mfcc_feat,
                spectral_centroid_feat,
                spectral_rolloff_feat,
                # chroma_cens_feat
                spectral_bandwidth_feat),
            axis=0)

        median_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose()

        features_df = pd.DataFrame(data=median_feat,
                                   columns=self.COL,
                                   index=None)

        features_df['label'] = self.label

        return features_df

예제 #7

0

파일 보기

파일: load_meta.py 프로젝트: jackyzha0/Speech2Braille

def features(rawsnd, num):
    import librosa
    import librosa.feature as lib_feat
    x, sample_rate = librosa.load(rawsnd, sr=16000)
    ft = lib_feat.mfcc(y=x,
                       sr=sample_rate,
                       n_mfcc=num,
                       n_fft=int(sample_rate * 0.025),
                       hop_length=int(sample_rate * 0.010))
    ft[0] = lib_feat.rmse(y=x,
                          hop_length=int(0.010 * sample_rate),
                          n_fft=int(0.025 * sample_rate))
    deltas = librosa.feature.delta(ft)
    ft_plus_deltas = np.vstack([ft, deltas])
    ft_plus_deltas /= np.max(np.abs(ft_plus_deltas), axis=0)
    return (ft_plus_deltas.T)

예제 #8

0

파일 보기

파일: preprocessing.py 프로젝트: maxifischer/rhythm-extraction

def get_mir(audio_path):

    hop_length = 200
    # Spectral Flux/Flatness, MFCCs, SDCs
    spectrogram = madmom.audio.spectrogram.Spectrogram(audio_path,
                                                       frame_size=2048,
                                                       hop_size=hop_length,
                                                       fft_size=4096)
    # only take 30s snippets to align data
    audio = madmom.audio.signal.Signal(audio_path,
                                       dtype=float,
                                       start=0,
                                       stop=30)

    all_features = []

    #print(spectrogram.shape)
    #print(audio.shape)
    #print('signal sampling rate: {}'.format(audio.sample_rate))

    # madmom features
    all_features.extend([
        spectral_flux(spectrogram),
        superflux(spectrogram),
        complex_flux(spectrogram)
    ])  #, MFCC(spectrogram)])

    # mfcc still wrong shape as it is a 2 array

    # librosa features
    libr_features = [
        spectral_centroid(audio, hop_length=hop_length),
        spectral_bandwidth(audio, hop_length=hop_length),
        spectral_flatness(audio, hop_length=hop_length),
        spectral_rolloff(audio, hop_length=hop_length),
        rmse(audio, hop_length=hop_length),
        zero_crossing_rate(audio, hop_length=hop_length)
    ]  #, mfcc(audio)])
    for libr in libr_features:
        all_features.append(np.squeeze(libr, axis=0))
    # for feature in all_features:
    #     print(feature.shape)
    X = np.stack(all_features, axis=1)[na, :, :]
    return X

예제 #9

0

파일 보기

def plt_show_solo(filename, filepath):
    wav_filename = audioconvert.convert_to_monowav(filename, filepath)
    timestart = time.time()
    y, sr = load(wav_filename, dtype="float32", res_type=TYPE)
    print("{LOAD TIME}:%f" % (time.time() - timestart))

    tempo1, beats1 = beat_track(y=y,
                                tightness=1)  # 计算节拍点，tightness就是对节拍的吸附性，越低越混乱
    onset_envelope = onset_strength(y=y)
    rms_envelope = rmse(y=y)
    # -----------RMS ENVELOPE

    MAX_RMS = np.max(rms_envelope)
    AVERAGE_RMS = np.mean(rms_envelope)
    onset_all_beat = []
    frame_all_beat = []
    for beat in beats1:
        onset_all_beat.append(onset_envelope[beat])
        frame_all_beat.append(beat)
    AVERAGE_ONSET = np.mean(onset_all_beat)
    plt_show(filename, rms_envelope.T, onset_all_beat, frame_all_beat, MAX_RMS,
             AVERAGE_RMS, AVERAGE_ONSET)

예제 #10

0

파일 보기

파일: tf_model.py 프로젝트: jackyzha0/Speech2Braille

 def features(rawsnd, num):
     """Compute num amount of audio features of a sound
     Args:
         rawsnd : array with string paths to .wav files
         num : numbers of mfccs to compute
     Returns:
         Return a num x max_stepsize*32 feature vector
     """
     import librosa
     import librosa.feature as lib_feat
     x, sample_rate = librosa.load(rawsnd, sr=16000)
     ft = lib_feat.mfcc(y=x,
                        sr=sample_rate,
                        n_mfcc=num,
                        n_fft=int(sample_rate * 0.025),
                        hop_length=int(sample_rate * 0.010))
     ft[0] = lib_feat.rmse(y=x,
                           hop_length=int(0.010 * sample_rate),
                           n_fft=int(0.025 * sample_rate))
     deltas = librosa.feature.delta(ft)
     ft_plus_deltas = np.vstack([ft, deltas])
     ft_plus_deltas /= np.max(np.abs(ft_plus_deltas), axis=0)
     return (ft_plus_deltas.T)

예제 #11

0

파일 보기

파일: feature_engineer.py 프로젝트: giulbia/baby_cry_detection

    def compute_librosa_features(self, audio_data, feat_name):
        """
        Compute feature using librosa methods

        :param audio_data: signal
        :param feat_name: feature to compute
        :return: np array
        """

        # if rmse_feat.shape == (1, 427):
        #     rmse_feat = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1)

        if feat_name == 'zero_crossing_rate':
            return zero_crossing_rate(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'rmse':
            return rmse(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'mfcc':
            return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)
        elif feat_name == 'spectral_centroid':
            return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
        elif feat_name == 'spectral_rolloff':
            return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90)
        elif feat_name == 'spectral_bandwidth':
            return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)

예제 #12

0

파일 보기

파일: feature_engineer.py 프로젝트: royangkr/BabyReady

    def feature_engineer(self, audio_data):
        """
        Extract features using librosa.feature.

        Each signal is cut into frames, features are computed for each frame and averaged [median].
        The numpy array is transformed into a data frame with named columns.

        :param audio_data: the input signal samples with frequency 44.1 kHz
        :return: a numpy array (numOfFeatures x numOfShortTermWindows)
        """

        logging.info('Computing zero_crossing_rate...')
        start = timeit.default_timer()

        zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        logging.info('Computing rmse...')
        start = timeit.default_timer()

        rmse_feat = rmse(y=audio_data, hop_length=self.FRAME)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        logging.info('Computing mfcc...')
        start = timeit.default_timer()

        mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        logging.info('Computing spectral centroid...')
        start = timeit.default_timer()

        spectral_centroid_feat = spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        logging.info('Computing spectral rolloff...')
        start = timeit.default_timer()

        spectral_rolloff_feat = spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        logging.info('Computing spectral bandwidth...')
        start = timeit.default_timer()

        spectral_bandwidth_feat = spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        # logging.info('Computing chroma cens...')
        # start = timeit.default_timer()
        #
        # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror
        # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
        #
        # stop = timeit.default_timer()
        # logging.info('Time taken: {0}'.format(stop - start))

        concat_feat = np.concatenate((zcr_feat,
                                      rmse_feat,
                                      mfcc_feat,
                                      spectral_centroid_feat,
                                      spectral_rolloff_feat,
                                      # chroma_cens_feat,
                                      spectral_bandwidth_feat
                                      ), axis=0)

        logging.info('Averaging...')
        start = timeit.default_timer()

        mean_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose()

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        return mean_feat, self.label

예제 #13

0

파일 보기

파일: ektraksi_data_latih.py 프로젝트: Rafie93/identification_voice_accent


header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

file = open('data_training.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
sukus = 'banjar_hulu banjar_kuala dayak_bakumpai dayak_ngaju'.split()
for g in sukus:
    for filename in os.listdir(f'data_training/{g}'):
        songname = f'data_training/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = fitur.chroma_stft(y=y, sr=sr)
        spec_cent = fitur.spectral_centroid(y=y, sr=sr)
        spec_bw = fitur.spectral_bandwidth(y=y, sr=sr)
        rmse = fitur.rmse(y)
        zcr = fitur.zero_crossing_rate(y)
        mfcc = fitur.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)}  {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data_training.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

예제 #14

0

파일 보기

def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]):
    """
    extracts features with help of librosa
    :param soundwave: extracted soundwave from file
    :param sampling_rate: sampling rate
    :param feature_list: list of features to compute
    :param sound_name: type of sound, i.e. dog
    :return: np.array of all features for the soundwave
    """
    print("Computing features for ",sound_name)

    if len(feature_list)==0:
        feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram",
                      "mfcc","rmse","spectral_centroid","spectral_bandwidth",
                      "spectral_contrast","spectral_flatness","spectral_rolloff",
                      "poly_features","tonnetz","zero_crossing_rate"]

    features=[]


    #feature_len
    #"chroma_stft":12
    if "chroma_stft" in feature_list:
        features.append(feat.chroma_stft(soundwave, sampling_rate))

    #"chroma_cqt":12
    if "chroma_cqt" in feature_list:
        features.append(feat.chroma_cqt(soundwave, sampling_rate))

    #"chroma_cens":12
    if "chroma_cens" in feature_list:
        features.append(feat.chroma_cens(soundwave, sampling_rate))

    #"malspectrogram":128
    if "melspectrogram" in feature_list:
        features.append(feat.melspectrogram(soundwave, sampling_rate))

    #"mfcc":20
    if "mfcc" in feature_list:
        features.append(feat.mfcc(soundwave, sampling_rate))

    #"rmse":1
    if "rmse" in feature_list:
        features.append(feat.rmse(soundwave))

    #"spectral_centroid":1
    if "spectral_centroid" in feature_list:
        features.append(feat.spectral_centroid(soundwave, sampling_rate))

    #"spectral_bandwidth":1
    if "spectral_bandwidth" in feature_list:
        features.append(feat.spectral_bandwidth(soundwave, sampling_rate))

    #"spectral_contrast":7
    if "spectral_contrast" in feature_list:
        features.append(feat.spectral_contrast(soundwave, sampling_rate))

    #"spectral_flatness":1
    if "spectral_flatness" in feature_list:
        features.append(feat.spectral_flatness(soundwave))

    #"spectral_rolloff":1
    if "spectral_rolloff" in feature_list:
        features.append(feat.spectral_rolloff(soundwave, sampling_rate))

    #"poly_features":2
    if "poly_features" in feature_list:
        features.append(feat.poly_features(soundwave, sampling_rate))

    #"tonnetz":6
    if "tonnetz" in feature_list:
        features.append(feat.tonnetz(soundwave, sampling_rate))

    #"zero_crossing_rate":1
    if "zero_crossing_rate" in feature_list:
        features.append(feat.zero_crossing_rate(soundwave))


    return np.concatenate(features)

예제 #15

0

파일 보기

파일: feature_extraction.py 프로젝트: generic-group-404/project-noise

def rmse_spec(raw):
    """Calculates the root-mean-square energy for each sample in data."""
    return np.array([rmse(S=x) for x in raw])

예제 #16

0

파일 보기

파일: feature.py 프로젝트: flyzaway/Attention-for-speech-emotion-classification

def get_feature_from_librosa(wave_name, window):
    #print wave_name
    (rate, sig) = wav.read(wave_name)

    chroma_stft_feat = feature.chroma_stft(sig,
                                           rate,
                                           n_fft=window,
                                           hop_length=window / 2)
    #print chroma_stft_feat.shape
    mfcc_feat = feature.mfcc(y=sig, sr=rate, n_mfcc=13, hop_length=window / 2)
    mfcc_feat = mfcc_feat[1:, :]
    #print mfcc_feat.shape
    d_mfcc_feat = feature.delta(mfcc_feat)
    #print d_mfcc_feat.shape
    d_d_mfcc_feat = feature.delta(d_mfcc_feat)
    #print d_d_mfcc_feat.shape
    zero_crossing_rate_feat = feature.zero_crossing_rate(sig,
                                                         frame_length=window,
                                                         hop_length=window / 2)
    #print zero_crossing_rate_feat.shape

    S = librosa.magphase(
        librosa.stft(sig,
                     hop_length=window / 2,
                     win_length=window,
                     window='hann'))[0]
    rmse_feat = feature.rmse(S=S)
    #print rmse_feat.shape

    centroid_feat = feature.spectral_centroid(sig,
                                              rate,
                                              n_fft=window,
                                              hop_length=window / 2)
    #print centroid_feat.shape

    bandwith_feat = feature.spectral_bandwidth(sig,
                                               rate,
                                               n_fft=window,
                                               hop_length=window / 2)
    #print bandwith_feat.shape

    contrast_feat = feature.spectral_contrast(sig,
                                              rate,
                                              n_fft=window,
                                              hop_length=window / 2)
    #print contrast_feat.shape
    rolloff_feat = feature.spectral_rolloff(sig,
                                            rate,
                                            n_fft=window,
                                            hop_length=window / 2)  #计算滚降频率
    #print rolloff_feat.shape

    poly_feat = feature.poly_features(sig,
                                      rate,
                                      n_fft=window,
                                      hop_length=window /
                                      2)  #拟合一个n阶多项式到谱图列的系数。
    #print poly_feat.shape
    #==============================================================================
    #     print(chroma_stft_feat.shape)
    #     #print(corr_feat.shape)
    #     print(mfcc_feat.shape)
    #     print(d_mfcc_feat.shape)
    #     print(d_d_mfcc_feat.shape)
    #     print(zero_crossing_rate_feat.shape)
    #     print(rmse_feat.shape)
    #     print(centroid_feat.shape)
    #     print(bandwith_feat.shape)
    #     print(contrast_feat.shape)
    #     print(rolloff_feat.shape)
    #     print(poly_feat.shape)
    #==============================================================================
    feat = numpy.hstack(
        (chroma_stft_feat.T, mfcc_feat.T, d_mfcc_feat.T, d_d_mfcc_feat.T,
         zero_crossing_rate_feat.T, rmse_feat.T, centroid_feat.T,
         bandwith_feat.T, contrast_feat.T, rolloff_feat.T, poly_feat.T))
    feat = feat.T
    return feat  #一行代表一帧的特征

예제 #17

0

파일 보기

파일: features.py 프로젝트: timrappold/WeeBro

    def featurize(self):
        """
        Extract features using librosa.feature. Convert wav vec, the sound
        amplitude as a function of time, to a variety of extracted features,
        such as Mel Frequency Cepstral Coeffs, Root Mean Square Energy, Zero
        Crossing Rate, etc.

        :param observations
        :ptype: list of tuples (label, wav vec, sampling rate)
        :return:
        :rtype:

        Each signal is cut into frames, features are computed for each frame and averaged [median].
        The numpy array is transformed into a data frame with named columns.
        :param raw: the input signal samples with frequency 44.1 kHz
        :return: a numpy array (numOfFeatures x numOfShortTermWindows)
        """

        start = timeit.default_timer()

        logging.debug('Loading Librosa raw audio vector...')

        raw, _ = librosa.load(self.path, sr=self.RATE, mono=True)
        raw = raw[:self.TRUNCLENGTH]

        if len(raw) < self.TRUNCLENGTH:
            logging.info(f"Not featurizing {self.path} because raw vector is "
                         f"too short. `None` will be returned for all data "
                         f"formats.")
            return self

        logging.debug('Computing Zero Crossing Rate...')
        zcr_feat = zero_crossing_rate(y=raw, hop_length=self.FRAME)

        logging.debug('Computing RMSE ...')
        rmse_feat = rmse(y=raw, hop_length=self.FRAME)

        logging.debug('Computing MFCC...')
        mfcc_feat = mfcc(y=raw, sr=self.RATE, n_mfcc=self.N_MFCC)

        logging.debug('Computing spectral centroid...')
        spectral_centroid_feat = spectral_centroid(y=raw,
                                                   sr=self.RATE,
                                                   hop_length=self.FRAME)

        logging.debug('Computing spectral roll-off ...')
        spectral_rolloff_feat = spectral_rolloff(y=raw,
                                                 sr=self.RATE,
                                                 hop_length=self.FRAME,
                                                 roll_percent=0.90)

        logging.debug('Computing spectral bandwidth...')
        spectral_bandwidth_feat = spectral_bandwidth(y=raw,
                                                     sr=self.RATE,
                                                     hop_length=self.FRAME)

        logging.debug('Concatenate all features...')
        mat = np.concatenate((
            zcr_feat,
            rmse_feat,
            spectral_centroid_feat,
            spectral_rolloff_feat,
            spectral_bandwidth_feat,
            mfcc_feat,
        ),
                             axis=0)

        logging.debug(f'Mat shape: {mat.shape}')

        logging.debug(f'Create self.raw...')
        self.raw = raw.reshape(1, -1)

        logging.debug(f'Create self.vec by averaging mat along time dim...')
        self.vec = np.mean(mat, axis=1, keepdims=True).reshape(1, -1)

        logging.debug(f'Vec shape: {self.vec.shape}')

        logging.debug(f'Create self.mat...')
        assert mat.shape == (18, 426), 'Matrix dims do not match (426,18)'
        self.mat = mat.reshape(
            1,
            18,
            426,
        )

        stop = timeit.default_timer()
        logging.info('Time taken: {0}'.format(stop - start))

        return self

예제 #18

0

파일 보기

        row = np.concatenate((row, cqt))
        sens = np.mean(lf.chroma_cens(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, sens))
        spcent = np.mean(lf.spectral_centroid(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, spcent))
        flatness = np.mean(lf.spectral_flatness(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, flatness))
        rolloff = np.mean(lf.spectral_rolloff(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, rolloff))
        mspec = np.mean(lf.melspectrogram(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, mspec))
        mfcc = np.mean(lf.mfcc(thing1[:-1], n_mfcc=30).T, axis=0)
        row = np.concatenate((row, mfcc))
        tonnetz = np.mean(lf.tonnetz(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, tonnetz))
        rmse = np.mean(lf.rmse(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, rmse))
        contrast = np.mean(lf.spectral_contrast(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, contrast))
        tempo = np.mean(lf.tempogram(thing[:-1], win_length=88).T, axis=0)
        row = np.concatenate((row, tempo))
        row = np.append(row, thing1[-1])
        #print(len(row))

        train_data = np.append(train_data, row)
        counter += 1

columns = ["feat_" + str(i) for i in range(299)]
columns.append("class")
df_train2 = pd.DataFrame(columns=columns)