def _extractFeatures_(self): print('Extraction des features ...') mfcc_feat = [] filter_feat = [] spectrum = [] for i in range(len(self.rate)): mfcc_feat.append( mfcc(self.data[i], self.rate[i], self.winlen, self.winstep, self.numcep, self.nfilt, self.nfft, self.lowfreq, self.highfreq)) filterFeat, energy, spectrum0 = fbank(self.data[i], self.rate[i], self.winlen, self.winstep, self.nfilt, self.nfft, self.lowfreq, self.highfreq) filterFeat = 10 * np.log10(filterFeat) #filterFeat -= (numpy.mean(filterFeat, axis=0) + 1e-8) #filterFeat /=numpy.std(filterFeat, axis=0) spectrum0 = 10 * np.log10(spectrum0) #spectrum0 -= (numpy.mean(spectrum0, axis=0) + 1e-8) #spectrum0 /=numpy.std(spectrum0, axis=0) filter_feat.append(filterFeat) spectrum.append(spectrum0) self.features = { 'mfcc': mfcc_feat, 'filter': filter_feat, 'spectrum': spectrum } print('Extraction des features terminée ...')
def comp_feat(self, sig, rate): ''' compute the features Args: sig: the audio signal as a 1-D numpy array rate: the sampling rate Returns: the features as a [seq_length x feature_dim] numpy array ''' #snip the edges sig = snip(sig, rate, float(self.conf['winlen']), float(self.conf['winstep'])) feat, energy = base.mfcc(sig, rate, self.conf) if self.conf['include_energy'] == 'True': feat = np.append(feat, energy[:, np.newaxis], 1) if self.conf['dynamic'] == 'delta': feat = base.delta(feat) elif self.conf['dynamic'] == 'ddelta': feat = base.ddelta(feat) elif self.conf['dynamic'] != 'nodelta': raise Exception('unknown dynamic type') return feat
def comp_feat(self, sig, rate): ''' compute the features Args: sig: the audio signal as a 1-D numpy array rate: the sampling rate Returns: the features as a [seq_length x feature_dim] numpy array ''' feat, energy = base.mfcc(sig, rate, self.conf) if self.conf['include_energy'] == 'True': feat = np.append(feat, energy[:, np.newaxis], 1) if self.conf['dynamic'] == 'delta': feat = base.delta(feat) elif self.conf['dynamic'] == 'ddelta': feat = base.ddelta(feat) elif self.conf['dynamic'] != 'nodelta': raise Exception('unknown dynamic type') #mean and variance normalize the features if self.conf['mvn'] == 'True': feat = (feat - feat.mean(0)) / feat.std(0) return feat
def extractFeatures(input_signal): """extract features from the cleaned signal. :param cleaned signal :return: features list""" # compute mfcc list if len(input_signal) == 0: print("cleaned signal is empty") return input_signal mfcc_list = np.array(mfcc(input_signal, samplerate=prm.params["sample_rate"].get(), winlen=0.032, winstep=0.016, numcep=30, nfilt=55, nfft=2048, lowfreq=0, highfreq=6000, preemph=0.95, ceplifter=22, appendEnergy=True )) extractor = LPCExtractor(prm.params["sample_rate"].get(), 32, 16, 30, 0.95) lpcc = extractor.extract(input_signal) pitch = extract_pitch(input_signal) # Cepstral Mean Normalization @TODO: WHY IS THIS NOT HELPING?? if 0: mean_mfcc = np.mean(mfcc_list.T, 1) std_mfcc = np.std(mfcc_list.T, 1) for i in range(len(mfcc_list)): for j in range(len(mfcc_list[i])): mfcc_list[i][j] = (mfcc_list[i][j]-mean_mfcc[j])/std_mfcc[j] # print np.shape(mfcc_list[i-1]), np.shape(mean_mfcc), np.shape(std_mfcc) N = 2 delta_list = delta(mfcc_list, N) ddelta_list = delta(delta_list, N) # do not keep first coeff (energy) features_list = list() for k in range(len(mfcc_list)): # features_list += [np.hstack((mfcc_list[k][0:], lpcc[k][0:]))] features_list += [mfcc_list[k][0:]] # features_list += [lpcc[k][0:]] # features_list += [np.hstack((mfcc_list[k][0:], delta_list[k][0:], ddelta_list[k][0:]))] # print np.shape(mfcc_list), np.shape(features_list) # dont return nan # @TODO WHY DOES THIS HAPPEN? for row in features_list: for cell in row: if cell != cell: print "Cell is nan (see feature extraction):", str(cell) return [] full_features_list = [] full_features_list = list(np.ravel(features_list)) full_features_list.extend([pitch]*30) # do we need to append this multiple times to ensure that the forest selects it? return full_features_list
def mfcc_features(sig): features = base.mfcc(sig, samplerate=44100, winlen=0.02, winstep=0.01, numcep=13, nfilt=40) # Mean Normalization for feature vectors. mean_vector = np.mean(features, axis=0) normalized = features - mean_vector return normalized
liste_fe_echant = [] liste_classe = [] liste_dsp = [] matrice = [] for NomFichier in liste_nom_fichier_apprentissage: (Fe, Echantillons) = scipy.io.wavfile.read("Signaux/" + NomFichier) liste_fe_echant.append((Fe, Echantillons)) NumerClasse = Prefixe.index(NomFichier[0:2]) liste_classe.append(NumerClasse) dsp = np.abs(np.fft.fft(Echantillons)) liste_dsp.append(dsp) VecteurCoefficients = base.mfcc(Echantillons, samplerate=Fe, winlen=(len(Echantillons) / Fe), winstep=(len(Echantillons) / Fe), nfft=1024) matrice.append(VecteurCoefficients[0]) # Affichage représentation temporelle et frequentielle (3 fichiers aléatoires) nb1 = random.randint(0, len(liste_fe_echant)) nb2 = random.randint(0, len(liste_fe_echant)) nb3 = random.randint(0, len(liste_fe_echant)) plt.subplot(321) plt.plot(liste_fe_echant[nb1][1], "r") plt.title("Représentation temporelle de " + Prefixe[liste_classe[nb1]]) plt.subplot(322) plt.plot(liste_dsp[nb1], "r") plt.title("Représentation frequentielle de " + Prefixe[liste_classe[nb1]])
def comp_feat(self, sig, rate): ''' compute the features Args: sig: the audio signal as a 1-D numpy array rate: the sampling rate Returns: the features as a [seq_length x feature_dim] numpy array ''' feat, energy = base.mfcc(sig, rate, self.conf) # write the wav to temporary location and invoke external pitch extractor. # make sure 'reaper' is in your $PATH tempdir = os.path.join('/tmp', str(os.getpid())) if not os.path.isdir(tempdir): os.makedirs(tempdir) name = 'mix' wav.write(os.path.join(tempdir, name + '.wav'), rate, np.int16(sig)) os.system('reaper -i ' + os.path.join(tempdir, name + '.wav') + ' -f ' + os.path.join(tempdir, name + '.txt -a -e 0.01')) pitch = np.loadtxt(os.path.join(tempdir, name + '.txt'), skiprows=7)[:, 2] pitch = np.pad(pitch, (0, max(0, feat.shape[0] - pitch.shape[0])), 'edge') # linear interpolation in voiceless regions voiceless = np.where(pitch == -1)[0] jump = np.where((voiceless[1:] - voiceless[:-1]) > 1)[0] segments = np.split(voiceless, jump + 1) for seg in segments: idx1 = seg[0] - 1 idx2 = seg[-1] + 1 val1 = -1 val2 = -1 if idx1 >= 0: val1 = pitch[idx1] if idx2 < pitch.size: val2 = pitch[idx2] if val1 == -1: #segment starts at utterence start val1 = val2 if val2 == -1: # segment ends at utterance end val2 = val1 if val1 == -1: #segment is the whole utterance => make up a value val1 = 150 val2 = 150 #interpolate pitch[seg] = (val2 - val1) * (np.array(seg) - idx1) / float(idx2 - idx1) + val1 feat = np.append(feat, pitch[:feat.shape[0], np.newaxis], 1) if self.conf['include_energy'] == 'True': feat = np.append(feat, energy[:, np.newaxis], 1) if self.conf['dynamic'] == 'delta': feat = base.delta(feat) elif self.conf['dynamic'] == 'ddelta': feat = base.ddelta(feat) elif self.conf['dynamic'] != 'nodelta': raise Exception('unknown dynamic type') #mean and variance normalize the features if self.conf['mvn'] == 'True': feat = (feat - feat.mean(0)) / ( feat.std(0) + 1e-20 ) # features could be constant, e.g. voiceless speech return feat
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== #!/usr/bin/env python ''' Example for sigproc.py ''' # pylint: skip-file import scipy.io.wavfile as wav from base import mfcc from base import delta from base import logfbank if __name__ == '__main__': (rate, sig) = wav.read("english.wav") mfcc_feat = mfcc(sig, rate) d_mfcc_feat = delta(mfcc_feat, 2) fbank_feat = logfbank(sig, rate) print(fbank_feat[1:3, :])
for f in batch1_fns: # Get set of signals from 1 experiment with the highest value per channel v1, v2, ev = read_ae_file2(f) sig = [] for i in range(len(v1)): sig.append(max_sig(v1[i], v2[i])) sig = np.array(sig) # jank code that converts raw signal to vector of mfcc holder = [] for i in range(len(sig)): holder.append( base.mfcc(sig[i], samplerate=rate, winlen=window, winstep=ratio, lowfreq=300000, highfreq=1800000)) X = [] for i in range(len(sig)): X.append(holder[i][0]) ''' CLUSTER STATISTICS ROUTINE ''' silh = np.array([]) # holder arrays db_score = np.array([]) # Cluster and get stat for i in range(min_cluster, max_cluster + 1): kmeans = KMeans(n_clusters=i, n_init=100, tol=1e-6).fit(X)