def process( self, progressTracker: Optional[DefaultProgressTracker] = None ) -> Tuple[TimeValue]: if progressTracker is not None: self.progressTracker = progressTracker wav = self.data.wave assert isinstance(wav, Wave) self.progressTracker.update(10) ftr, time, frequency = dsp.spectrogram( wav, self.parameters["frame_size"], self.parameters["frame_size"], # frame_rate = frame_size NFFT=self.parameters["NFFT"], normalized=self.parameters["normalized"], ) if self.parameters["normalized"]: ftr = ftr - np.mean(ftr, axis=1).reshape(-1, 1) time = (time[:-1] + time[1:]) // 2 assert self.parameters["delta_order"] > 0 dynamic_win = np.arange(-self.parameters["delta_order"], self.parameters["delta_order"] + 1) win_width = self.parameters["delta_order"] win_length = 2 * win_width + 1 den = 0 for s in range(1, win_width + 1): den += s**2 den *= 2 dynamic_win = dynamic_win / den N, D = ftr.shape print(N) temp_array = np.zeros((N + 2 * win_width, D)) delta_array = np.zeros((N, D)) self.progressTracker.update(90) temp_array[win_width:N + win_width] = ftr for w in range(win_width): temp_array[w, :] = ftr[0, :] temp_array[N + win_width + w, :] = ftr[-1, :] for i in range(N): for w in range(win_length): delta_array[i, :] += temp_array[i + w, :] * dynamic_win[w] value = np.mean(np.diff(delta_array, axis=0)**2, axis=1)**0.5 dis = TimeValue( time, value, wav.fs, wav.duration, path=wav.path.with_name(wav.path.stem + "-discont").with_suffix( TimeValue.default_suffix), ) dis.min = 0 dis.max = value.max() dis.unit = "dB" dis.label = "spectral discontinuity" self.progressTracker.update(100) return (dis, )
def process( self, progressTracker: Optional[DefaultProgressTracker] = None ) -> Tuple[Partition, TimeValue]: if progressTracker is not None: self.progressTracker = progressTracker wav = self.data.wave assert isinstance(wav, Wave) wav = wav.convert_dtype(np.float64) self.progressTracker.update(10) assert isinstance(wav, Wave) M, time, frequency = dsp.spectrogram(wav, self.parameters["frame_size"], self.parameters["frame_rate"]) self.progressTracker.update(20) # Emax = np.atleast_2d(np.max(M, axis=1)).T Emax = 20 * np.log10(np.mean((10**(M / 10)), axis=1)**0.5) P = np.empty((len(Emax), 2)) P[:, 0] = 1 / (1 + np.exp(Emax - self.parameters["threshold"])) P[:, 1] = 1 - P[:, 0] # complement self.progressTracker.update(30) seq, _ = viterbi.search_smooth(P, self.parameters["smooth"]) self.progressTracker.update(90) tmv = TimeValue( time, seq, wav.fs, wav.duration, wav.path.with_name(wav.path.stem + "-act").with_suffix( TimeValue.default_suffix), ) par = Partition.from_TimeValue(tmv) par.value = np.char.mod("%d", par.value) emax = TimeValue( time, Emax, wav.fs, wav.duration, wav.path.with_name(wav.path.stem + "-emax").with_suffix( TimeValue.default_suffix), ) emax.min = Emax.min() emax.max = Emax.max() emax.unit = "dB" emax.label = "maximum frequency magnitude" return par, emax
def process( self, progressTracker: Optional[DefaultProgressTracker] = None ) -> Tuple[TimeValue]: if progressTracker is not None: self.progressTracker = progressTracker wav = self.data.wave self.progressTracker.update(10) assert isinstance(wav, Wave) ftr, time, frequency = dsp.spectrogram( wav, self.parameters["frame_size"], self.parameters["frame_rate"], NFFT=self.parameters["NFFT"], ) self.progressTracker.update(50) a = frequency.searchsorted(self.parameters["freq_min"]) b = frequency.searchsorted(self.parameters["freq_max"]) # import time as timer # print('searching') # tic = timer.time() seq, _ = viterbi.search_smooth(ftr[:, a:b], self.parameters["smooth"]) self.progressTracker.update(90) # toc = timer.time() # print(f'done, took: {toc-tic}') trk = TimeValue( time, frequency[a + seq], wav.fs, wav.duration, wav.path.with_name(wav.path.stem + "-peak").with_suffix( TimeValue.default_suffix), ) trk.min = 0 trk.max = wav.fs / 2 trk.unit = "Hz" trk.label = "frequency" return (trk, )
def process( self, progressTracker: Optional[DefaultProgressTracker] = None ) -> Tuple[TimeValue, TimeValue, Partition]: if progressTracker is not None: self.progressTracker = progressTracker wav = self.data.wave assert isinstance(wav, Wave) wav = wav.convert_dtype(np.float64) self.progressTracker.update(10) assert isinstance(wav, Wave) R, time, frequency = dsp.correlogram(wav, self.parameters["frame_size"], self.parameters["frame_rate"]) self.progressTracker.update(30) assert isinstance(wav, Wave) t0_min = int(round(wav.fs / self.parameters["f0_max"])) t0_max = int(round(wav.fs / self.parameters["f0_min"])) index = np.arange(t0_min, t0_max + 1, dtype=np.int16) E = R[:, 0] # energy R = R[:, index] # only look at valid candidates # normalize R -= R.min() R /= R.max() # find best sequence seq, _ = viterbi.search_smooth(R, self.parameters["smooth"]) self.progressTracker.update(80) f0 = wav.fs / (t0_min + seq) # degree of periodicity dop = R[np.arange(R.shape[0]), seq] # voicing v = ((dop > self.parameters["dop threshold"]) & (E > self.parameters["energy threshold"]) # (seq > 0) & (seq < len(index) - 1) ).astype(np.int16) v = signal.medfilt(v, 5) # TODO: replace by a 2-state HMM f0[v == 0] = np.nan # prepare tracks f0 = TimeValue( time, f0, wav.fs, wav.duration, wav.path.with_name(wav.path.stem + "-f0").with_suffix( TimeValue.default_suffix), ) f0.min = self.parameters["f0_min"] f0.max = self.parameters["f0_max"] f0.unit = "Hz" f0.label = "F0" dop = TimeValue( time, dop, wav.fs, wav.duration, wav.path.with_name(wav.path.stem + "-dop").with_suffix( TimeValue.default_suffix), ) dop.min = 0 dop.max = 1 dop.label = "degree of periodicity" vox = TimeValue( time, v, wav.fs, wav.duration, wav.path.with_name(wav.path.stem + "-vox").with_suffix( TimeValue.default_suffix), ) vox = Partition.from_TimeValue(vox) vox.label = "voicing" assert isinstance(f0, TimeValue) assert isinstance(dop, TimeValue) assert isinstance(vox, Partition) return f0, dop, vox