def extract_features_path(self, path_audio, static=True, plots=False, fmt="npy", kaldi_file=""): """ Extract the representation learning features for audios inside a path :param path_audio: directory with (.wav) audio files inside, sampled at 16 kHz :param static: whether to compute and return statistic functionals over the feature matrix, or return the feature matrix computed over frames :param plots: timeshift to extract the features :param fmt: format to return the features (npy, dataframe, torch, kaldi) :param kaldi_file: file to store kaldifeatures, only valid when fmt=="kaldi" :returns: features computed from the audio file. >>> replearning=RepLearning('CAE') >>> path_audio="../audios/" >>> features1=phonological.replearning(path_audio, static=True, plots=False, fmt="npy") >>> features2=phonological.replearning(path_audio, static=True, plots=False, fmt="csv") >>> features3=phonological.replearning(path_audio, static=False, plots=True, fmt="torch") >>> replearning.extract_features_path(path_audio, static=False, plots=False, fmt="kaldi", kaldi_file="./test.ark") """ hf=os.listdir(path_audio) hf.sort() pbar=tqdm(range(len(hf))) ids=[] Features=[] for j in pbar: pbar.set_description("Processing %s" % hf[j]) audio_file=path_audio+hf[j] feat=self.extract_features_file(audio_file, static=static, plots=plots, fmt="npy") Features.append(feat) if static: ids.append(hf[j]) else: ids.append(np.repeat(hf[j], feat.shape[0])) Features=np.vstack(Features) ids=np.hstack(ids) if fmt in("npy","txt"): return Features if fmt in("dataframe","csv"): if static: df={} for e, k in enumerate(self.head_st): df[k]=Features[:,e] else: df={} for e, k in enumerate(self.head_dyn): df[k]=Features[:,e] df["id"]=ids return pd.DataFrame(df) if fmt=="torch": return torch.from_numpy(Features) if fmt=="kaldi": if static: raise ValueError("Kaldi is only supported for dynamic features") dictX=get_dict(Features, ids) save_dict_kaldimat(dictX, kaldi_file) else: raise ValueError(fmt+" is not supported")
def extract_features_file(self, audio, static=True, plots=False, fmt="npy", kaldi_file=""): """Extract the prosody features from an audio file :param audio: .wav audio file. :param static: whether to compute and return statistic functionals over the feature matrix, or return the feature matrix computed over frames :param plots: timeshift to extract the features :param fmt: format to return the features (npy, dataframe, torch, kaldi) :param kaldi_file: file to store kaldi features, only valid when fmt=="kaldi" :returns: features computed from the audio file. >>> prosody=Prosody() >>> file_audio="../audios/001_ddk1_PCGITA.wav" >>> features1=prosody.extract_features_file(file_audio, static=True, plots=True, fmt="npy") >>> features2=prosody.extract_features_file(file_audio, static=True, plots=True, fmt="dataframe") >>> features3=prosody.extract_features_file(file_audio, static=False, plots=True, fmt="torch") >>> prosody.extract_features_file(file_audio, static=False, plots=False, fmt="kaldi", kaldi_file="./test") """ if static: features = self.prosody_static(audio, plots) if fmt in ("npy", "txt"): return features if fmt in ("dataframe", "csv"): df = {} for e, k in enumerate(self.head_st): #print(feat_v.shape, len(head_st), e, k) df[k] = [features[e]] return pd.DataFrame(df) if fmt == "torch": feat_t = torch.from_numpy(features) return feat_t if fmt == "kaldi": raise ValueError( "Kaldi is only supported for dynamic features") raise ValueError("format" + fmt + " is not supported") else: features = self.prosody_dynamic(audio) if fmt in ("npy", "txt"): return features if fmt in ("dataframe", "csv"): df = {} for e, k in enumerate(self.head_dyn): df[k] = features[:, e] return pd.DataFrame(df) if fmt == "torch": feat_t = torch.from_numpy(features) return feat_t if fmt == "kaldi": name_all = audio.split('/') dictX = {name_all[-1]: features} save_dict_kaldimat(dictX, kaldi_file) else: raise ValueError("format" + fmt + " is not supported")
def extract_features_file(self, audio, static=True, plots=False, fmt="npy", kaldi_file=""): """Extract the glottal features from an audio file :param audio: .wav audio file. :param static: whether to compute and return statistic functionals over the feature matrix, or return the feature matrix computed over frames :param plots: timeshift to extract the features :param fmt: format to return the features (npy, dataframe, torch, kaldi) :param kaldi_file: file to store kaldi features, only valid when fmt=="kaldi" :returns: features computed from the audio file. >>> glottal=Glottal() >>> file_audio="../audios/001_a1_PCGITA.wav" >>> features1=glottal.extract_features_file(file_audio, static=True, plots=True, fmt="npy") >>> features2=glottal.extract_features_file(file_audio, static=True, plots=True, fmt="dataframe") >>> features3=glottal.extract_features_file(file_audio, static=False, plots=True, fmt="torch") >>> glottal.extract_features_file(file_audio, static=False, plots=False, fmt="kaldi", kaldi_file="./test.ark") """ if audio.find('.wav') == -1 and audio.find('.WAV') == -1: raise ValueError(audio + " is not a valid wav file") fs, data_audio = read(audio) data_audio = data_audio - np.mean(data_audio) data_audio = data_audio / float(np.max(np.abs(data_audio))) size_frameS = self.size_frame * float(fs) size_stepS = self.size_step * float(fs) overlap = size_stepS / size_frameS nF = int((len(data_audio) / size_frameS / overlap)) - 1 data_audiof = np.asarray(data_audio * (2**15), dtype=np.float32) f0 = pysptk.sptk.rapt(data_audiof, fs, int(0.01 * fs), min=20, max=500, voice_bias=-0.2, otype='f0') sizef0 = int(self.size_frame / 0.01) stepf0 = int(self.size_step / 0.01) startf0 = 0 stopf0 = sizef0 avgGCIt = np.zeros(nF) varGCIt = np.zeros(nF) avgNAQt = np.zeros(nF) varNAQt = np.zeros(nF) avgQOQt = np.zeros(nF) varQOQt = np.zeros(nF) avgH1H2t = np.zeros(nF) varH1H2t = np.zeros(nF) avgHRFt = np.zeros(nF) varHRFt = np.zeros(nF) rmwin = [] for l in range(nF): data_frame = data_audio[int(l * size_stepS):int(l * size_stepS + size_frameS)] f0_frame = f0[startf0:stopf0] pf0framez = np.where(f0_frame != 0)[0] f0nzframe = f0_frame[pf0framez] if len(f0nzframe) < 10: startf0 = startf0 + stepf0 stopf0 = stopf0 + stepf0 rmwin.append(l) continue GCI = SE_VQ_varF0(data_frame, fs, f0=f0_frame) g_iaif = IAIF(data_frame, fs, GCI) g_iaif = g_iaif - np.mean(g_iaif) g_iaif = g_iaif / max(abs(g_iaif)) glottal = cumtrapz(g_iaif) glottal = glottal - np.mean(glottal) glottal = glottal / max(abs(glottal)) startf0 = startf0 + stepf0 stopf0 = stopf0 + stepf0 gci_s = GCI[:] GCId = np.diff(gci_s) avgGCIt[l] = np.mean(GCId / fs) varGCIt[l] = np.std(GCId / fs) NAQ, QOQ, T1, T2, H1H2, HRF = get_vq_params( glottal, g_iaif, fs, GCI) avgNAQt[l] = np.mean(NAQ) varNAQt[l] = np.std(NAQ) avgQOQt[l] = np.mean(QOQ) varQOQt[l] = np.std(QOQ) avgH1H2t[l] = np.mean(H1H2) varH1H2t[l] = np.std(H1H2) avgHRFt[l] = np.mean(HRF) varHRFt[l] = np.std(HRF) if plots: self.plot_glottal(data_frame, fs, GCI, g_iaif, glottal, avgGCIt[l], varGCIt[l]) if len(rmwin) > 0: varGCIt = np.delete(varGCIt, rmwin) avgNAQt = np.delete(avgNAQt, rmwin) varNAQt = np.delete(varNAQt, rmwin) avgQOQt = np.delete(avgQOQt, rmwin) varQOQt = np.delete(varQOQt, rmwin) avgH1H2t = np.delete(avgH1H2t, rmwin) varH1H2t = np.delete(varH1H2t, rmwin) avgHRFt = np.delete(avgHRFt, rmwin) varHRFt = np.delete(varHRFt, rmwin) feat = np.stack((varGCIt, avgNAQt, varNAQt, avgQOQt, varQOQt, avgH1H2t, varH1H2t, avgHRFt, varHRFt), axis=1) if fmt == "npy" or fmt == "txt": if static: return dynamic2static(feat) else: return feat elif fmt == "dataframe" or fmt == "csv": if static: feat_st = dynamic2static(feat) head_st = [] df = {} for k in [ "global avg", "global std", "global skewness", "global kurtosis" ]: for h in self.head: head_st.append(k + " " + h) for e, k in enumerate(head_st): df[k] = [feat_st[e]] return pd.DataFrame(df) else: df = {} for e, k in enumerate(self.head): df[k] = feat[:, e] return pd.DataFrame(df) elif fmt == "torch": if static: feat_s = dynamic2static(feat) feat_t = torch.from_numpy(feat_s) return feat_t else: return torch.from_numpy(feat) elif fmt == "kaldi": if static: raise ValueError( "Kaldi is only supported for dynamic features") else: name_all = audio.split('/') dictX = {name_all[-1]: feat} save_dict_kaldimat(dictX, kaldi_file)
def extract_features_file(self, audio, static=True, plots=False, fmt="npy", kaldi_file=""): """Extract the phonation features from an audio file :param audio: .wav audio file. :param static: whether to compute and return statistic functionals over the feature matrix, or return the feature matrix computed over frames :param plots: timeshift to extract the features :param fmt: format to return the features (npy, dataframe, torch, kaldi) :param kaldi_file: file to store kaldi features, only valid when fmt=="kaldi" :returns: features computed from the audio file. >>> phonation=Phonation() >>> file_audio="../audios/001_a1_PCGITA.wav" >>> features1=phonation.extract_features_file(file_audio, static=True, plots=True, fmt="npy") >>> features2=phonation.extract_features_file(file_audio, static=True, plots=True, fmt="dataframe") >>> features3=phonation.extract_features_file(file_audio, static=False, plots=True, fmt="torch") >>> phonation.extract_features_file(file_audio, static=False, plots=False, fmt="kaldi", kaldi_file="./test") """ fs, data_audio = read(audio) data_audio = data_audio - np.mean(data_audio) data_audio = data_audio / float(np.max(np.abs(data_audio))) size_frameS = self.size_frame * float(fs) size_stepS = self.size_step * float(fs) overlap = size_stepS / size_frameS if self.pitch_method == 'praat': name_audio = audio.split('/') temp_uuid = 'phon' + name_audio[-1][0:-4] if not os.path.exists(self.PATH + '/../tempfiles/'): os.makedirs(self.PATH + '/../tempfiles/') temp_filename_vuv = self.PATH + '/../tempfiles/tempVUV' + temp_uuid + '.txt' temp_filename_f0 = self.PATH + '/../tempfiles/tempF0' + temp_uuid + '.txt' praat_functions.praat_vuv(audio, temp_filename_f0, temp_filename_vuv, time_stepF0=self.size_step, minf0=self.minf0, maxf0=self.maxf0) F0, _ = praat_functions.decodeF0(temp_filename_f0, len(data_audio) / float(fs), self.size_step) os.remove(temp_filename_vuv) os.remove(temp_filename_f0) elif self.pitch_method == 'rapt': data_audiof = np.asarray(data_audio * (2**15), dtype=np.float32) F0 = pysptk.sptk.rapt(data_audiof, fs, int(size_stepS), min=self.minf0, max=self.maxf0, voice_bias=self.voice_bias, otype='f0') F0nz = F0[F0 != 0] Jitter = jitter_env(F0nz, len(F0nz)) nF = int((len(data_audio) / size_frameS / overlap)) - 1 Amp = [] logE = [] apq = [] ppq = [] DF0 = np.diff(F0nz, 1) DDF0 = np.diff(DF0, 1) F0z = F0[F0 == 0] totaldurU = len(F0z) thresholdE = 10 * logEnergy([self.energy_thr_percent]) degreeU = 100 * float(totaldurU) / len(F0) lnz = 0 for l in range(nF): data_frame = data_audio[int(l * size_stepS):int(l * size_stepS + size_frameS)] energy = 10 * logEnergy(data_frame) if F0[l] != 0: Amp.append(np.max(np.abs(data_frame))) logE.append(energy) if lnz >= 12: # TODO: amp_arr = np.asarray( [Amp[j] for j in range(lnz - 12, lnz)]) #print(amp_arr) apq.append(APQ(amp_arr)) if lnz >= 6: # TODO: f0arr = np.asarray([F0nz[j] for j in range(lnz - 6, lnz)]) ppq.append(PPQ(1 / f0arr)) lnz = lnz + 1 Shimmer = shimmer_env(Amp, len(Amp)) apq = np.asarray(apq) ppq = np.asarray(ppq) logE = np.asarray(logE) if len(apq) == 0: print( "warning, there is not enough long voiced segments to compute the APQ, in this case APQ=shimmer" ) apq = Shimmer if plots: self.plot_phon(data_audio, fs, F0, logE) if len(Shimmer) == len(apq): feat_mat = np.vstack((DF0[5:], DDF0[4:], Jitter[6:], Shimmer[6:], apq[6:], ppq, logE[6:])).T else: feat_mat = np.vstack((DF0[11:], DDF0[10:], Jitter[12:], Shimmer[12:], apq, ppq[6:], logE[12:])).T feat_v = dynamic2statict([DF0, DDF0, Jitter, Shimmer, apq, ppq, logE]) if fmt == "npy" or fmt == "txt": if static: return feat_v else: return feat_mat elif fmt == "dataframe" or fmt == "csv": if static: head_st = [] df = {} for k in ["avg", "std", "skewness", "kurtosis"]: for h in self.head: head_st.append(k + " " + h) for e, k in enumerate(head_st): df[k] = [feat_v[e]] return pd.DataFrame(df) else: df = {} for e, k in enumerate(self.head): df[k] = feat_mat[:, e] return pd.DataFrame(df) elif fmt == "torch": if static: feat_t = torch.from_numpy(feat_v) return feat_t else: return torch.from_numpy(feat_mat) elif fmt == "kaldi": if static: raise ValueError( "Kaldi is only supported for dynamic features") else: name_all = audio.split('/') dictX = {name_all[-1]: feat_mat} save_dict_kaldimat(dictX, kaldi_file) else: raise ValueError(fmt + " is not supported")
def extract_features_file(self, audio, static=True, plots=False, fmt="npy", kaldi_file=""): """Extract the phonological features from an audio file :param audio: .wav audio file. :param static: whether to compute and return statistic functionals over the feature matrix, or return the feature matrix computed over frames :param plots: timeshift to extract the features :param fmt: format to return the features (npy, dataframe, torch, kaldi) :param kaldi_file: file to store kaldi features, only valid when fmt=="kaldi" :returns: features computed from the audio file. >>> phonological=Phonological() >>> file_audio="../audios/001_ddk1_PCGITA.wav" >>> features1=phonological.extract_features_file(file_audio, static=True, plots=True, fmt="npy") >>> features2=phonological.extract_features_file(file_audio, static=True, plots=True, fmt="dataframe") >>> features3=phonological.extract_features_file(file_audio, static=False, plots=True, fmt="torch") >>> phonological.extract_features_file(file_audio, static=False, plots=False, fmt="kaldi", kaldi_file="./test") >>> phonological=Phonological() >>> path_audio="../audios/" >>> features1=phonological.extract_features_path(path_audio, static=True, plots=False, fmt="npy") >>> features2=phonological.extract_features_path(path_audio, static=True, plots=False, fmt="csv") >>> features3=phonological.extract_features_path(path_audio, static=False, plots=True, fmt="torch") >>> phonological.extract_features_path(path_audio, static=False, plots=False, fmt="kaldi", kaldi_file="./test.ark") """ df = self.phon.get_PLLR(audio, plot_flag=plots) keys = df.keys().tolist() keys.remove('time') if static: dff = {} feat_vec = [] functions = [np.mean, np.std, st.skew, st.kurtosis, np.max, np.min] for j in keys: for l, function in zip(self.statistics, functions): if fmt in ("npy", "txt", "torch"): feat_vec.append(function(df[j])) if fmt in ("dataframe", "csv"): feat_name = j + "_" + l dff[feat_name] = [function(df[j])] if fmt in ("npy", "txt"): return np.hstack(feat_vec) if fmt in ("dataframe", "csv"): return pd.DataFrame(dff) if fmt == "torch": return torch.from_numpy(np.hstack(feat_vec)) if fmt == "kaldi": raise ValueError( "Kaldi is only supported for dynamic features") raise ValueError(fmt + " is not supported") else: if fmt in ("npy", "txt"): featmat = np.stack([df[k] for k in keys], axis=1) return featmat if fmt in ("dataframe", "csv"): return df if fmt == "torch": featmat = np.stack([df[k] for k in keys], axis=1) return torch.from_numpy(featmat) if fmt == "kaldi": featmat = np.stack([df[k] for k in keys], axis=1) name_all = audio.split('/') dictX = {name_all[-1]: featmat} save_dict_kaldimat(dictX, kaldi_file) else: raise ValueError(fmt + " is not supported")
def extract_features_file(self, audio, static=True, plots=False, fmt="npy", kaldi_file=""): """ Extract the representation learning features from an audio file :param audio: .wav audio file. :param static: whether to compute and return statistic functionals over the feature matrix, or return the feature matrix computed over frames :param plots: timeshift to extract the features :param fmt: format to return the features (npy, dataframe, torch, kaldi) :param kaldi_file: file to store kaldi features, only valid when fmt=="kaldi" :returns: features computed from the audio file. >>> replearning=RepLearning('CAE') >>> file_audio="../audios/001_ddk1_PCGITA.wav" >>> features1=replearning.extract_features_file(file_audio, static=True, plots=True, fmt="npy") >>> features2=replearning.extract_features_file(file_audio, static=True, plots=True, fmt="dataframe") >>> features3=replearning.extract_features_file(file_audio, static=False, plots=True, fmt="torch") >>> replearning.extract_features_file(file_audio, static=False, plots=False, fmt="kaldi", kaldi_file="./test") >>> replearning=RepLearning('CAE') >>> path_audio="../audios/" >>> features1=replearning.extract_features_path(path_audio, static=True, plots=False, fmt="npy") >>> features2=replearning.extract_features_path(path_audio, static=True, plots=False, fmt="csv") >>> features3=replearning.extract_features_path(path_audio, static=False, plots=True, fmt="torch") >>> replearning.extract_features_path(path_audio, static=False, plots=False, fmt="kaldi", kaldi_file="./test.ark") """ hb=self.AEspeech.compute_bottleneck_features(audio) err=self.AEspeech.compute_rec_error_features(audio) if plots: self.AEspeech.plot_spectrograms(audio) if static: bottle_feat=np.hstack((np.mean(hb, 0), np.std(hb, 0), st.skew(hb, 0), st.kurtosis(hb, 0))) error_feat=np.hstack((np.mean(err, 0), np.std(err, 0), st.skew(err, 0), st.kurtosis(err, 0))) feat_vec=np.hstack((bottle_feat, error_feat)) if fmt in("npy","txt"): return feat_vec if fmt in("dataframe","csv"): dff={key: [value] for (key, value) in zip(self.head_st, feat_vec)} return pd.DataFrame(dff) if fmt=="torch": return torch.from_numpy(feat_vec) if fmt=="kaldi": raise ValueError("Kaldi is only supported for dynamic features") raise ValueError(fmt+" is not supported") else: featmat=np.concatenate((hb, err), axis=1) if fmt in("npy","txt"): return featmat if fmt in("dataframe","csv"): dff={} for e, key in enumerate(self.head_dyn): dff[key]=featmat[:,e] dff=pd.DataFrame(dff) return dff if fmt=="torch": return torch.from_numpy(featmat) if fmt=="kaldi": name_all=audio.split('/') dictX={name_all[-1]:featmat} save_dict_kaldimat(dictX, kaldi_file) else: raise ValueError(fmt+" is not supported")
def extract_features_file(self, audio, static=True, plots=False, fmt="npy", kaldi_file=""): """Extract the articulation features from an audio file :param audio: .wav audio file. :param static: whether to compute and return statistic functionals over the feature matrix, or return the feature matrix computed over frames :param plots: timeshift to extract the features :param fmt: format to return the features (npy, dataframe, torch, kaldi) :param kaldi_file: file to store kaldi features, only valid when fmt=="kaldi" :returns: features computed from the audio file. >>> articulation=Articulation() >>> file_audio="../audios/001_ddk1_PCGITA.wav" >>> features1=articulation.extract_features_file(file_audio, static=True, plots=True, fmt="npy") >>> features2=articulation.extract_features_file(file_audio, static=True, plots=True, fmt="dataframe") >>> features3=articulation.extract_features_file(file_audio, static=False, plots=True, fmt="torch") >>> articulation.extract_features_file(file_audio, static=False, plots=False, fmt="kaldi", kaldi_file="./test") >>> path_audio="../audios/" >>> features1=articulation.extract_features_path(path_audio, static=True, plots=False, fmt="npy") >>> features2=articulation.extract_features_path(path_audio, static=True, plots=False, fmt="csv") >>> features3=articulation.extract_features_path(path_audio, static=False, plots=True, fmt="torch") >>> articulation.extract_features_path(path_audio, static=False, plots=False, fmt="kaldi", kaldi_file="./test.ark") """ fs, data_audio = read(audio) data_audio = data_audio - np.mean(data_audio) data_audio = data_audio / float(np.max(np.abs(data_audio))) size_frameS = self.sizeframe * float(fs) size_stepS = self.step * float(fs) if self.pitch_method == 'praat': name_audio = audio.split('/') temp_uuid = 'articulation' + name_audio[-1][0:-4] if not os.path.exists(self.PATH + '/../tempfiles/'): os.makedirs(self.PATH + '/../tempfiles/') temp_filename_vuv = self.PATH + '/../tempfiles/tempVUV' + temp_uuid + '.txt' temp_filename_f0 = self.PATH + '/../tempfiles/tempF0' + temp_uuid + '.txt' praat_functions.praat_vuv(audio, temp_filename_f0, temp_filename_vuv, time_stepF0=self.step, minf0=self.minf0, maxf0=self.maxf0) F0, _ = praat_functions.decodeF0(temp_filename_f0, len(data_audio) / float(fs), self.step) segmentsFull, segmentsOn, segmentsOff = praat_functions.read_textgrid_trans( temp_filename_vuv, data_audio, fs, self.sizeframe) os.remove(temp_filename_vuv) os.remove(temp_filename_f0) elif self.pitch_method == 'rapt': data_audiof = np.asarray(data_audio * (2**15), dtype=np.float32) F0 = pysptk.sptk.rapt(data_audiof, fs, int(size_stepS), min=self.minf0, max=self.maxf0, voice_bias=self.voice_bias, otype='f0') segmentsOn = V_UV(F0, data_audio, fs, 'onset') segmentsOff = V_UV(F0, data_audio, fs, 'offset') BBEon, MFCCon = extractTrans(segmentsOn, fs, size_frameS, size_stepS, self.nB, self.nMFCC) BBEoff, MFCCoff = extractTrans(segmentsOff, fs, size_frameS, size_stepS, self.nB, self.nMFCC) DMFCCon = np.asarray( [np.diff(MFCCon[:, nf], n=1) for nf in range(MFCCon.shape[1])]).T DDMFCCon = np.asarray( [np.diff(MFCCon[:, nf], n=2) for nf in range(MFCCon.shape[1])]).T DMFCCoff = np.asarray( [np.diff(MFCCoff[:, nf], n=1) for nf in range(MFCCoff.shape[1])]).T DDMFCCoff = np.asarray( [np.diff(MFCCoff[:, nf], n=2) for nf in range(MFCCoff.shape[1])]).T name_audio = audio.split('/') temp_uuid = 'artic' + name_audio[-1][0:-4] if not os.path.exists(self.PATH + '/../tempfiles/'): os.makedirs(self.PATH + '/../tempfiles/') temp_filename = self.PATH + '/../tempfiles/tempFormants' + temp_uuid + '.txt' praat_functions.praat_formants(audio, temp_filename, self.sizeframe, self.step) [F1, F2] = praat_functions.decodeFormants(temp_filename) os.remove(temp_filename) if len(F0) < len(F1): F0 = np.hstack((F0, np.zeros(len(F1) - len(F0)))) F1nz = np.zeros((0, 1)) F2nz = np.zeros((0, 1)) DF1 = np.zeros((0, 1)) DDF1 = np.zeros((0, 1)) DF2 = np.zeros((0, 1)) DDF2 = np.zeros((0, 1)) else: F1 = np.hstack((F1, np.zeros(len(F0) - len(F1)))) F2 = np.hstack((F2, np.zeros(len(F0) - len(F2)))) pos0 = np.where(F0 == 0)[0] dpos0 = np.hstack(([1], np.diff(pos0))) f0u = np.split(pos0, np.where(dpos0 > 1)[0]) thr_sil = int(self.len_thr_miliseconds / self.step) sil_seg = [] for l in range(len(f0u)): if len(f0u[l]) >= thr_sil: F1[f0u[l]] = 0 F2[f0u[l]] = 0 sil_seg.append(f0u) sil_seg = np.hstack(sil_seg) F1nz = F1[F1 != 0] F2nz = F2[F2 != 0] DF1 = np.diff(F1, n=1) DF2 = np.diff(F2, n=1) DDF1 = np.diff(F1, n=2) DDF2 = np.diff(F2, n=2) if plots: self.plot_art(data_audio, fs, F0, F1, F2, segmentsOn, segmentsOff) if len(F1nz) == 0: F1nz = np.zeros((0, 1)) if len(F2nz) == 0: F2nz = np.zeros((0, 1)) if len(DF1) == 0: DF1 = np.zeros((0, 1)) if len(DDF1) == 0: DDF1 = np.zeros((0, 1)) if len(DF2) == 0: DF2 = np.zeros((0, 1)) if len(DDF2) == 0: DDF2 = np.zeros((0, 1)) feat_v = dynamic2statict_artic([ BBEon, MFCCon, DMFCCon, DDMFCCon, BBEoff, MFCCoff, DMFCCoff, DDMFCCoff, F1nz, DF1, DDF1, F2nz, DF2, DDF2 ]) feat_mat = np.hstack( (BBEon[2:, :], MFCCon[2:, :], DMFCCon[1:, :], DDMFCCon)) if fmt in ("npy", "txt"): if static: return feat_v return feat_mat if fmt in ("dataframe", "csv"): if static: head_st = [] df = {} for k in ["avg", "std", "skewness", "kurtosis"]: for h in self.head: head_st.append(k + " " + h) for e, k in enumerate(head_st): #print(feat_v.shape, len(head_st), e, k) df[k] = [feat_v[e]] return pd.DataFrame(df) else: df = {} for e, k in enumerate(self.head_dyn): df[k] = feat_mat[:, e] return pd.DataFrame(df) if fmt == "torch": if static: feat_t = torch.from_numpy(feat_v) return feat_t return torch.from_numpy(feat_mat) if fmt == "kaldi": if static: raise ValueError( "Kaldi is only supported for dynamic features") name_all = audio.split('/') dictX = {name_all[-1]: feat_mat} save_dict_kaldimat(dictX, kaldi_file)