def pysptk_featurize(audiofile): labels = list() features = list() fs, x = wavfile.read(audiofile) f0_swipe = pysptk.swipe(x.astype(np.float64), fs=fs, hopsize=80, min=60, max=200, otype="f0") features = features + stats(f0_swipe) labels = stats_labels('f0_swipe', labels) f0_rapt = pysptk.rapt(x.astype(np.float32), fs=fs, hopsize=80, min=60, max=200, otype="f0") features = features + stats(f0_rapt) labels = stats_labels('f0_rapt', labels) mgc = pysptk.mgcep(xw, 20, 0.0, 0.0) features = features + stats(mgc) labels = stats_labels('mel-spectrum envelope', labels) return features, labels
def readmgc(filename): # all parameters can adjust by yourself :) try: sr, x = wavfile.read(filename) if(sr != 44100): data, samplerate = soundfile.read(filename) soundfile.write(filename, data, 44100, subtype='PCM_16') except ValueError: data, samplerate = soundfile.read(filename) soundfile.write(filename, data, 44100, subtype='PCM_16') sr, x = wavfile.read(filename) assert sr == 44100 x = x.astype(np.float64) frame_length = 1024 hop_length = 256 # Windowing frames = librosa.util.frame(x, frame_length=frame_length, hop_length=hop_length).astype(np.float64).T frames *= pysptk.blackman(frame_length) assert frames.shape[1] == frame_length # Order of mel-cepstrum order = 25 alpha = 0.41 stage = 5 gamma = -1.0 / stage mgc = pysptk.mgcep(frames, order, alpha, gamma) mgc = mgc.reshape(-1, order + 1) print("mgc of {} is ok!".format(filename)) return mgc
def extract_mgc(amp_sp: np.array, fs: int = None, num_coded_sps: int = 60, mgc_alpha: float = None) -> np.array: """Extract MGC from the amplitude spectrum from SPTK.""" if mgc_alpha is None: assert fs is not None, "Either sampling rate or mgc alpha has to be given." mgc_alpha = AudioProcessing.fs_to_mgc_alpha(fs) mgc = pysptk.mgcep(amp_sp, order=num_coded_sps - 1, alpha=mgc_alpha, gamma=AudioProcessing.mgc_gamma, eps=1.0e-8, min_det=0.0, etype=1, itype=3) return mgc.astype(np.float32, copy=False)
def __test_synthesis(filt): # dummy source excitation source = __dummy_source() hopsize = 80 # dummy filter coef. windowed = __dummy_windowed_frames(source, frame_len=512, hopsize=hopsize) gamma = -1.0 / filt.stage mgc = pysptk.mgcep(windowed, filt.order, filt.alpha, gamma) b = pysptk.mgc2b(mgc, filt.alpha, gamma) # synthesis synthesizer = Synthesizer(filt, hopsize) y = synthesizer.synthesis(source, b) assert np.all(np.isfinite(y)) # transpose synthesizer = Synthesizer(filt, hopsize, transpose=True) y = synthesizer.synthesis(source, b) assert np.all(np.isfinite(y))
def readmgc(filename): # all parameters can adjust by yourself :) sr, x = wavfile.read(filename) assert sr == 16000 # Frequency sampling value x = x.astype(np.float64) frame_length = 1024 hop_length = 256 # Windowing frames = librosa.util.frame(x, frame_length=frame_length, hop_length=hop_length).astype(np.float64).T frames *= pysptk.blackman(frame_length) assert frames.shape[1] == frame_length # Order of mel-cepstrum order = 25 alpha = 0.41 stage = 5 gamma = -1.0 / stage mgc = pysptk.mgcep(frames, order, alpha, gamma) mgc = mgc.reshape(-1, order + 1) print("mgc of {} is ok!".format(filename)) return mgc
def test_mgcep_failure(): pysptk.mgcep(np.ones(256))
def test_mgcep_failure(): with pytest.raises(RuntimeError): pysptk.mgcep(np.ones(256))
def __test_min_det(min_det): pysptk.mgcep(x, min_det=min_det)
def __test_itype(itype=0): pysptk.mgcep(x, itype=itype)
def __test_gamma(gamma): pysptk.mgcep(x, gamma=gamma)
def __test(order, alpha, gamma): mgc = pysptk.mgcep(x, order, alpha, gamma) assert np.all(np.isfinite(mgc))
# Clean and gen should have the same length, and be 1D dis = stoi.stoi(gtaud, degaud, sr, extended=False) st.append(dis) #Compute MCD num=np.int(np.shape(degaud)[0]/1024)-5 #print(np.shape(gtaud), np.shape(degaud), sr, num) costFn=logSpecDbDist frame=1 nat=[] for ij in range(1,num): x=gtaud[1024*ij:1024*(ij+1)] #print(np.shape(x),x) try: mgc_temp = pysptk.mgcep(x) except: print("Error at: ", ij) else: nat.append(mgc_temp) if 1: #print(np.shape(mgc_temp)) #degaud=np.reshape(degaud[0:shp],(shp,1)) synth=[] for kidx in range(1,num): try: x=degaud[1024*kidx:1024*(kidx+1)] #print(np.shape(x),x) mgc_temp = pysptk.mgcep(x) except:
def mel_generalized_cepstrum(frames): mgc = ps.mgcep(frames, ORDER, ALPHA, GAMMA) return mgc
def __test_eps(etype=0, eps=0.0): pysptk.mgcep(x, etype=etype, eps=eps)
def __test_otype(otype=0): pysptk.mgcep(x, otype=otype)
def test_mgcep(order, alpha, gamma): x = windowed_dummy_data(1024) mgc = pysptk.mgcep(x, order, alpha, gamma) assert np.all(np.isfinite(mgc))