def classification(musicFile, segLength, sr, breakInterval, resultFile,
                   modelFile):

    # We divide the audio file in segments of length 3600 secs and load it to data
    data = []
    audioDuration = get_duration(filename=musicFile) // 1.0
    numSegments = int(audioDuration // breakInterval)
    print(audioDuration)

    # Loading data in size of break interval
    for i in range(numSegments):
        st = time.time()
        offset = i * breakInterval
        y, srp = load(musicFile,
                      sr=sr,
                      duration=breakInterval,
                      offset=offset,
                      res_type='kaiser_fast')
        for j in range(breakInterval):
            offset = j * 22050
            yp = y[offset:(offset + sr)]
            D = np.mean(mfcc(yp, sr=sr, n_mfcc=40), axis=1)
            data.append(D)
        del y
        print(time.time() - st)

    #Loading remaining data
    offset = numSegments * breakInterval
    duration = audioDuration - offset
    y, srp = load(musicFile,
                  sr=sr,
                  duration=duration,
                  offset=offset,
                  res_type='kaiser_fast')
    for i in range(int(duration)):
        offset = i * 22050
        yp = y[offset:(offset + sr)]
        D = np.mean(mfcc(yp, sr=sr, n_mfcc=40), axis=1)
        data.append(D)
    del y

    data = np.array(data)

    # Loading model and classifying file.
    model = load_model(modelFile)

    result = np.argmax(model.predict(data), axis=1)

    # Saving result to resultFile
    f = open(resultFile, 'w')
    for i in range(result.shape[0]):
        st = str(result[i]) + '\n'
        f.write(st)
    f.close()
    def compute_librosa_features(self, audio_data, feat_name):
        """
        Compute feature using librosa methods

        :param audio_data: signal
        :param feat_name: feature to compute
        :return: np array
        """

        # if rmse_feat.shape == (1, 427):
        #     rmse_feat = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1)

        if feat_name == 'zero_crossing_rate':
            return zero_crossing_rate(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'rmse':
            return rmse(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'mfcc':
            return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)
        elif feat_name == 'spectral_centroid':
            return spectral_centroid(y=audio_data,
                                     sr=self.RATE,
                                     hop_length=self.FRAME)
        elif feat_name == 'spectral_rolloff':
            return spectral_rolloff(y=audio_data,
                                    sr=self.RATE,
                                    hop_length=self.FRAME,
                                    roll_percent=0.90)
        elif feat_name == 'spectral_bandwidth':
            return spectral_bandwidth(y=audio_data,
                                      sr=self.RATE,
                                      hop_length=self.FRAME)
Beispiel #3
0
    def extract(self, audio_file):
        y, sr = librosa.load(audio_file, sr=self.sr)
        D = mfcc(y,
                 sr=self.sr,
                 n_mfcc=self.mfcc_no + 2,
                 n_fft=self.window_size,
                 hop_length=self.hop_size)
        D = D[2:, :]

        feats = []
        timestamps = []
        current_time = 0
        for i in range(0, D.shape[1], self.step):
            d = D[:, i:i + self.w]

            d = d.transpose()

            if d.shape[0] == self.w:
                feats.append(d)
                timestamps.append(current_time)
                current_time = current_time + self.step * self.hop_size / float(
                    self.sr)

        feats = np.array(feats)
        return feats, timestamps
    def compute_librosa_features(self, audio_data, feat_name):
        """
        Compute feature using librosa methods

        :param audio_data: signal
        :param feat_name: feature to compute
        :return: np array
        """
        # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror
        # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME)

        logging.info('=> Computing {}'.format(feat_name))

        if feat_name == 'zero_crossing_rate':
            return zero_crossing_rate(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'rmse':
            return rms(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'mfcc':
            return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)
        elif feat_name == 'spectral_centroid':
            return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
        elif feat_name == 'spectral_rolloff':
            return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90)
        elif feat_name == 'spectral_bandwidth':
            return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
Beispiel #5
0
def extract_audio_features(root_dir, row):
    raw_data_dir = join(root_dir, RAW_DATA_DIR)
    row_dict = row.to_dict()
    waveform, _ = load(raw_data_dir + row_dict['filename'], sr=FEATURE_ARGS['sr'])
    row_dict['melspec'] = _clean_features(melspectrogram(waveform, n_mels=EXTRACTOR_ARGS['n_mels'], **FEATURE_ARGS))
    row_dict['mfcc'] = _clean_features(mfcc(waveform, n_mfcc=EXTRACTOR_ARGS['n_mfcc'], **FEATURE_ARGS))
    return row_dict
def get_features(filename, *, winlen, winstep, n_mcep, mcep_alpha, minf0,
                 maxf0, type):
    wav, sr = load(filename, sr=None)

    # get f0
    x = wav.astype(float)
    _f0, t = world.harvest(x,
                           sr,
                           f0_floor=minf0,
                           f0_ceil=maxf0,
                           frame_period=winstep * 1000)
    f0 = world.stonemask(x, _f0, t, sr)

    window_size = int(sr * winlen)
    hop_size = int(sr * winstep)

    # get mel
    if type == 'mcc':
        spec = world.cheaptrick(x, f0, t, sr, f0_floor=minf0)
        h = sptk.sp2mc(spec, n_mcep - 1, mcep_alpha).T
    else:
        h = mfcc(x, sr, n_mfcc=n_mcep, n_fft=window_size, hop_length=hop_size)
    h = np.vstack((h, f0))
    maxlen = len(x) // hop_size + 2
    h = repeat_last_padding(h, maxlen)
    id = os.path.basename(filename).replace(".wav", "")
    return (id, x, h)
    def apply(self, data):
        all_ceps = []
        for ch in data:
            ceps, mspec, spec = mfcc(ch)
            all_ceps.append(ceps.ravel())

        return np.array(all_ceps)
def findTimbral(wave):  # 19 dimensions
    timbral_feature = {}

    centroid = feature.spectral_centroid(wave)
    timbral_feature['mu_centroid'] = np.mean(centroid)
    timbral_feature['var_centroid'] = np.var(centroid, ddof=1)

    rolloff = feature.spectral_rolloff(wave)
    timbral_feature['mu_rolloff'] = np.mean(rolloff)
    timbral_feature['var_rolloff'] = np.var(rolloff, ddof=1)

    flux = onset_strength(wave, lag=1)  # spectral flux
    timbral_feature['mu_flux'] = np.mean(flux)
    timbral_feature['var_flux'] = np.var(flux, ddof=1)

    zero_crossing = feature.zero_crossing_rate(wave)
    timbral_feature['mu_zcr'] = np.mean(zero_crossing)
    timbral_feature['var_zcr'] = np.var(zero_crossing)

    five_mfcc = feature.mfcc(wave, n_mfcc=10)  # n_mfcc = 10 dim
    i = 1
    for coef in five_mfcc:
        timbral_feature['mu_mfcc' + str(i)] = np.mean(coef)
        timbral_feature['var_mfcc' + str(i)] = np.var(coef, ddof=1)
        i = i + 1

    percent = feature_low_energy(wave)  # 1 dim
    timbral_feature['low_energy'] = percent

    return timbral_feature
 def process_signal(self, signal):
     ft = np.abs(stft(signal, n_fft=self.window_size, hop_length=self.window_stride, window='hann'))
     mel = melspectrogram(sr=self.sample_rate,S=ft)
     mfccs = mfcc( sr=self.sample_rate, n_mfcc=self.num_mfccs,S=mel)
     deltas=  delta(mfccs)
     delta_deltas=  delta(mfccs,order=2)
     return mfccs, deltas, delta_deltas
Beispiel #10
0
def ConvertAudioToInputArray(audio, sr, num_mfccs, numcontext):
    #   Get the average mel function of the audio files
    mfcc_a = mfcc(y=audio, sr=sr, n_mfcc=num_mfccs)

    #   BiRNN stride = 2
    mfcc_a = mfcc_a[::2]

    #   one stride per timestep in the input
    num_strides = len(mfcc_a)

    #   add empty initial and final contexts
    empty_context = np.zeros((numcontext, num_mfccs), dtype=mfcc_a.dtype)
    mfcc_a = np.concatenate((empty_context, mfcc_a, empty_context))

    #   create a view into the array with overlapping strides of size
    #   numcontext (past) + 1 (present) + numcontext (future)
    window_size = 2 * numcontext + 1
    train_inputs = np.lib.stride_tricks.as_strided(
        mfcc_a, (num_strides, window_size, num_mfccs),
        (mfcc_a.stires[0], mfcc_a.stires[0], mfcc_a.stires[1]),
        writeable=False)

    #   Flatten the second and third dimensions
    train_inputs = np.reshape(train_inputs, [num_strides, -1])

    #   Whiten inputs
    #   copy the strided array so we can write to it safely
    train_inputs = np.copy(train_inputs)
    train_inputs = (train_inputs -
                    np.mean(train_inputs)) / np.std(train_inputs)

    #   Return the training data
    return train_inputs
Beispiel #11
0
def predict_2d(data, fs, interval):

    X = np.zeros((1, 20, int(16 * interval), 1))  #Length of mfcc from training

    X[0, :, :, 0] = mfcc(data, fs)

    return classifier.predict(X).argmax(1)
Beispiel #12
0
def load_training_file(training_data_file_name: str, ret_length=False):
    """
    loads training data from the given file name

    Parameters
    ----------
    training_data_file_name: str
        the name of the data file

    Returns
    -------
    (classname, mfcc):tuple
        the class and data will be returned
    """
    file_data, samplerate = sf.read(training_data_file_name)
    length = len(file_data)
    mfcc = feature.mfcc(file_data,
                        sr=samplerate,
                        n_mfcc=25,
                        hop_length=512,
                        n_fft=2048)
    mfcc = reshape(mfcc, 20)
    if ret_length:
        return (training_data_file_name, mfcc), length
    else:
        return (training_data_file_name, mfcc)
Beispiel #13
0
    def vowel_predicting(self, model_path):
        model = models.load_model(model_path)

        data = self.data
        data = mfcc(data, self.fs)
        data = np.expand_dims(data, 0)
        data = np.expand_dims(data, 3)

        predictions_single = model.predict(data)
        predictions = predictions_single[0]
        print(predictions)
        t = 0.5
        if predictions[0] > t:
            letter = 'A'
        elif predictions[1] > t:
            letter = 'E'
        elif predictions[2] > t:
            letter = 'I'
        elif predictions[3] > t:
            letter = 'O'
        elif predictions[4] > t:
            letter = 'U'
        elif predictions[5] > t:
            letter = 'Y'
        else:
            letter = '----'
        return letter
Beispiel #14
0
    def get_perform_mfcc(self, outside_series=None, outside_sr=None):
        y = self.select_series(outside_series)
        sr = self.select_sr(outside_sr)

        mfccs = mfcc(y, sr=sr)

        return scale(mfccs, axis=1)
Beispiel #15
0
    def get_seq_size(self, frames, sr):
        """
        Get audio sequence size of audio time series when converted to mfcc-features or mel spectrogram

        :param frames: audio time series
        :param sr: sampling rate of frames
        :return: sequence size of mfcc-converted audio
        """

        if self.type == 'mfcc':
            mfcc_frames = mfcc(frames,
                               sr,
                               n_fft=self.frame_length,
                               hop_length=self.hop_length,
                               n_mfcc=self.mfcc_features,
                               n_mels=self.n_mels)
            return mfcc_frames.shape[1]

        elif self.type == 'spectrogram':
            spectrogram = melspectrogram(frames,
                                         sr,
                                         n_fft=self.frame_length,
                                         hop_length=self.hop_length,
                                         n_mels=self.n_mels)
            return spectrogram.shape[1]

        else:
            raise ValueError('Not a valid feature type: ', self.type)
Beispiel #16
0
def extract_features(audio, rate):
    audio = reduce_noise_power(audio, rate)

    audio, indexes = trim(audio)

    mfcc_feature = mfcc(y=audio,
                        sr=rate,
                        n_mfcc=13,
                        n_fft=int(0.025 * rate),
                        n_mels=40,
                        fmin=20,
                        hop_length=int(0.03 * rate))

    mfcc_feature = preprocessing.scale(mfcc_feature, axis=1)

    mfcc_feature = stats.zscore(mfcc_feature)

    pitches, magnitudes = pitch(y=audio,
                                sr=rate,
                                fmin=50,
                                fmax=400,
                                n_fft=int(0.025 * rate),
                                hop_length=int(0.03 * rate))

    #delta_f = delta(mfcc_feature)
    #d_delta_f = delta(mfcc_feature, order=2)
    combined = np.hstack((np.transpose(mfcc_feature), np.transpose(pitches)))
    return combined
def split_audio(wav_path):
    print('splitting audios...')
    dst = os.path.join(wav_path.split('/')[0], 'info')
    with open(os.devnull, 'w') as ffmpeg_log:
        command = 'ffmpeg -i ' + wav_path + ' -f segment -segment_time 1 -c copy ' + os.path.join(dst,'%02d.wav')
        subprocess.call(command, shell=True, stdout=ffmpeg_log, stderr=ffmpeg_log)
    os.remove(wav_path)
    output = np.zeros((20, 0))
    for segment in os.listdir(dst):
        segment = os.path.join(dst, segment)
        sample_rate, audio_info = wavfile.read(segment)
        audio_length = audio_info.shape[0]
        if audio_length<=16000:
            audio_info = np.pad(audio_info, (0, 16000-audio_length), 'constant', constant_values=0)
        else:
            audio_info = audio_info[0:16000]
        audio_info = audio_info.astype(np.float32)
        mfcc_feats = mfcc(audio_info, sr=sample_rate)
        #print(mfcc_feats.shape)
        output = np.concatenate((output, mfcc_feats), axis=1)
    #print(output.shape)
    
    for file in os.listdir(dst):
        if file.endswith('.wav'):
            os.remove(os.path.join(dst, file))

    return output.T
Beispiel #18
0
def process_audio(audio_data, sr):
    """
    Computes the Mel-Frequency Cepstral Coefficients and their first and second order derivatives. Concatenates then
    all into a single numpy array and the swaps the axis from [n_mfcc, n_samples] to [n_samples, n_mfcc].

    :param audio_data: floating point time series of an audio file
    :param sr: the sample rate at which train_data was loaded
    :return: a feature array of dimension [n_samples, n_mfcc] containing the computed MFCCs and their time
             derivatives
    """
    mel_freq_coeff = mfcc(y=audio_data,
                          sr=sr,
                          n_mfcc=13,
                          hop_length=int(.10 * sr),
                          n_fft=int(.20 * sr))
    mel_freq_coeff = mel_freq_coeff[1:, :]

    mel_freq_coeff_delta = delta(mel_freq_coeff, width=7)
    mel_freq_coeff_delta_delta = delta(mel_freq_coeff, width=7, order=2)

    features = concatenate(
        (mel_freq_coeff, mel_freq_coeff_delta, mel_freq_coeff_delta_delta),
        axis=0)
    features = swapaxes(features, 0, 1)
    return features
Beispiel #19
0
    def test_mfcc(self):
        correct = rosaft.mfcc(y=self.sig,
                              sr=self.fs,
                              n_fft=nfft,
                              hop_length=stepsize)
        actual = mfcc(self.args)

        self.assertTrue(np.abs(correct - actual).max() < tol)
Beispiel #20
0
def mel_to_mfcc(x):
    mfcc_wav = mfcc(S=power_to_db(x),
                    n_mfcc=13,
                    sr=sr,
                    n_fft=n_fft,
                    hop_length=hop_length)
    mfcc_wav = mfcc_wav.reshape(mfcc_wav.shape[0], mfcc_wav.shape[1], 1)
    return mfcc_wav
def create_ceps(fn):
    sample_rate, X = scipy.io.wavfile.read(fn)

    Y = X * 1.0

    # ceps, mspec, spec = mfcc(Y)
    ceps = mfcc(Y)
    write_ceps(ceps, fn)
Beispiel #22
0
def get_feature(fs, signal):
    """
    :param fs
    :param signal
    :return feature:mfcc
    """
    feature = mfcc(signal, fs, S=None, n_mfcc=20).T
    return feature
Beispiel #23
0
def compute_MFCC(data, sample_rate, num_coefs=20):
    """
    small set of features (usually about 10–20) which concisely describe the overall shape of a spectral envelope
    :param data: np array audiodata
    :param sample_rate:
    :param num_coefs: number of features to generate, default 20
    :return: np array (2D) that contains the key features of an audio file
    """
    return feature.mfcc(data, sr=sample_rate, n_mfcc=num_coefs)
def get_feature(fs, signal):
    """
    简易的提取特征函数
    :param fs: 采样率
    :param signal: 信号
    :return feature:mfcc特征
    """
    feature = mfcc(signal, fs, S=None, n_mfcc=20).T
    return feature
Beispiel #25
0
def mfccCoefficients(sample):
    '''
    Determines the average value of each mfcc coefficient for each window.
    '''
    mels = np.mean(mfcc(y=np.array([float(e) for e in sample]),
                        sr=len(sample),
                        n_mfcc=128).T,
                   axis=0)
    return mels
 def compute_mfccs(self, x):
     new_sample = x.astype(float)
     mfccs = mfcc(new_sample, 16000, n_mfcc=13, n_fft=640, hop_length=320)
     grad_mfccs = np.gradient(mfccs, axis = 1)
     mfccs = np.concatenate((mfccs, grad_mfccs))
     mfccs = np.concatenate((mfccs, np.gradient(grad_mfccs, axis = 1)))
     mfccs = torch.from_numpy(mfccs)
     mfccs = mfccs.type(torch.FloatTensor)
     return mfccs
def get_feature(fs, signal):
    """
    简易的提取特征函数
    :param fs: 采样率
    :param signal: 信号
    :return feature:mfcc特征
    """
    feature = mfcc(signal, fs, S=None, n_mfcc=20).T
    return feature
Beispiel #28
0
def extract_features(example_file):
    soundfile, samplerate = sf.read(example_file)
    return mfcc(y=soundfile,
                sr=samplerate,
                S=None,
                n_mfcc=13,
                dct_type=2,
                n_fft=1024,
                hop_length=64).T
 def frequency_feature(self):
     data = np.array(self.sum_all())
     data = np.transpose(data, (1, 0))
     data_mfccs = []
     for i in data:
         sig = i / max(abs(i))
         data_mfccs.append(mfcc(sig, sr=10, n_mfcc=2, hop_length=10))
     data_mfccs = np.array(data_mfccs)
     # data_mfccs = np.transpose(data_mfccs,(0,2,1))
     return data_mfccs
def extract_features(keystroke, sr=44100, n_mfcc=16, n_fft=441, hop_len=110):
    """Return an MFCC-based feature vector for a given keystroke."""
    spec = mfcc(
        y=keystroke.astype(float),
        sr=sr,
        n_mfcc=n_mfcc,
        n_fft=n_fft,  # n_fft=220 for a 10ms window
        hop_length=hop_len,  # hop_length=110 for ~2.5ms
    )
    return spec.flatten()
Beispiel #31
0
def extract_mfcc(full_audio_path):
    wave, sample_rate = load(full_audio_path)
    n_fft = int(sample_rate * 0.03)
    hop_length = n_fft // 2
    mfcc_features = mfcc(wave,
                         sr=sample_rate,
                         n_mfcc=50,
                         hop_length=hop_length,
                         n_fft=n_fft).T
    return mfcc_features
Beispiel #32
0
    def loadFile(self, fname):
        '''
        fname:      filename of the sound file we want to load
        '''
        if self.verbose: print('Loading %s' % fname)

        if self.cached:
            if not os.path.exists(fname + '-mfcc.npy'):
                y, sr = librosa.load(fname)
                data = mfcc(y=y, sr=sr).T
                np.save(fname + '-mfcc.npy', data)
            else:
                data = np.load(fname + '-mfcc.npy')
        else:
            y, sr = librosa.load(fname)
            # TODO: Add ability to filter by seconds/duration
            # seconds = y.size/sr
            data = mfcc(y=y, sr=sr).T

        return data
Beispiel #33
0
def extract_features(data, n_fft=2048):
    res = []
    for row in data:
        centroid = mfcc(row, n_fft=n_fft, sr=22050)
        res.append([
            np.min(centroid),
            np.max(centroid),
            np.median(centroid),
        ])

    return np.array(res)
Beispiel #34
0
def compute_spectral_signature(song_id, cached = True, use_covar = True):

	if cached and is_signature_cached(song_id):
		return fetch_signature(song_id)

	audioclip_path = join(AUDIOCLIPS_FOLDER, "{0}.mp3".format(song_id))
	waveform, sample_rate, frame_length, frames = None, None, None, None
	try:
		waveform, sample_rate = load(audioclip_path, sr=SAMPLE_RATE)
		frame_length = core.time_to_samples(np.arange(0, 2, FRAME_TIMESTEP), sr = sample_rate)[1]
		frames = librosa_util.frame(y = waveform, frame_length = frame_length, hop_length = frame_length)
	except Exception as e:
		logging.warn("Couldn't preprocess audioclip '{0}': {1}".format(audioclip_path, str(e)))
		return None

	# The 'frames' array has shape (<frame_length>, <number_of_frames>)
	# hence, we transpose it. This holds true for every call to the librosa library that returns an array.
	frames = frames.T

	spectrograms = []
	for frame in frames[FRAME_START: FRAME_START + FRAME_TOTAL]:
		spectrogram = feature.mfcc(y = frame, sr = frame_length).T
		to_add = [ entry[MFCSS_OFFSET : MFCSS_OFFSET+N_MFCCS] for entry in spectrogram ]
		spectrograms += to_add
	
	spectrograms = np.array(spectrograms)
	clusters = KMeans(n_clusters = CLUSTERS_PER_SIGNATURE)
	model = clusters.fit(spectrograms)

	# A song's "signature" is an array [ ( u_i, s_i, w_i ) ... ]. Where 0 <= i < CLUSTERS_PER_SIGNATURE
	# The triple (u_i, s_i, w_i) contains these variables:
	# 	u_i : Mean for Cluster i
	#	s_i : Covariance for Cluster i
	#	w_i : Weight for Cluster i
	
	signature = []
	for label in xrange(CLUSTERS_PER_SIGNATURE):
		indexes = [ index for index, element in enumerate(model.labels_) if element == label ]
		cluster_points = [ spectrograms[i] for i in indexes ]

		mean = model.cluster_centers_[label]
		covariance = np.cov(cluster_points) if use_covar else []
		weight = len(cluster_points)
		cluster_params = (mean, covariance, weight)

		signature.append(cluster_params)

	persist_signature(song_id, signature)
	
	return signature
def mfcc(path):
    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    # We use a small hop length of 64 here so that the
    # frames line up with the beat tracker example below.

    y, sr = load_files(path)

    print 'claculating mfcc ' + path
    S = feature.melspectrogram(y, sr=sr, n_fft=2048, hop_length=64, n_mels=128)
    
    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = logamplitude(S, ref_power=np.max)
    mfcc_v = feature.mfcc(S=log_S, n_mfcc=14)
    
    return np.sum(mfcc_v, axis=1)/mfcc_v.shape[1]
    def compute_librosa_features(self, audio_data, feat_name):
        """
        Compute feature using librosa methods

        :param audio_data: signal
        :param feat_name: feature to compute
        :return: np array
        """

        # if rmse_feat.shape == (1, 427):
        #     rmse_feat = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1)

        if feat_name == 'zero_crossing_rate':
            return zero_crossing_rate(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'rmse':
            return rmse(y=audio_data, hop_length=self.FRAME)
        elif feat_name == 'mfcc':
            return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13)
        elif feat_name == 'spectral_centroid':
            return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
        elif feat_name == 'spectral_rolloff':
            return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90)
        elif feat_name == 'spectral_bandwidth':
            return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
Beispiel #37
0
def runMFCC(signal, sample_rate=22050):
  return mfcc(y=np.asarray(signal), sr=sample_rate, n_mfcc=10)
Beispiel #38
0


soundPath = '/Users/jiusi/Desktop/audioSamples/Rec_003.wav'
forrest = '/Users/jiusi/dares_g1.1/dares_g1/left/forrest_1.wav'
livingRoom = '/Users/jiusi/dares_g1.1/dares_g1/left/living_room_1.wav'
study = '/Users/jiusi/dares_g1.1/dares_g1/left/study_1.wav'
street = '/Users/jiusi/Desktop/busy_street_1.wav'
sub = '/Users/jiusi/Desktop/sub_0.m4a'

quite_smarti = '/Users/jiusi/Desktop/quiet_smarti.wav'
quite_iphone = '/Users/jiusi/Desktop/quiet_iphone.m4a'

rate, sig = ud.getDataFromPath(quite_smarti)

mfccValue = mfcc(y=sig, sr=rate, n_mfcc=13)
delta_mfcc  = librosa.feature.delta(mfccValue)
delta2_mfcc = librosa.feature.delta(mfccValue, order=2)

# plt.plot()
#
# fig = plt.figure()
# signalSub = fig.addSubPlot()
# signalSub.plot(range(0, len(obs)), obs)

# mfccSub = fig.addSubPlot()
# mfccSub.plot(range(0, len(mfccValue)), mfccValue)

#
# # How do they look?  We'll show each in its own subplot
# plt.figure(figsize=(12, 6))