예제 #1
0
 def process(
     self,
     progressTracker: Optional[DefaultProgressTracker] = None
 ) -> Tuple[TimeValue]:
     # Processor.process(self, **kwargs)
     if progressTracker is not None:
         self.progressTracker = progressTracker
     wav = self.data.wave
     assert isinstance(wav, Wave)
     wav = wav.convert_dtype(np.float64)
     self.progressTracker.update(10)
     assert isinstance(wav, Wave)
     frame = dsp.frame(wav, self.parameters["frame_size"],
                       self.parameters["frame_rate"])
     self.progressTracker.update(70)
     frame.value *= signal.hann(frame.value.shape[1])
     value = 20 * np.log10(np.mean(frame.value**2.0, axis=1)**0.5)
     self.progressTracker.update(90)
     nrg = TimeValue(
         frame.time,
         value,
         wav.fs,
         wav.duration,
         path=wav.path.with_name(wav.path.stem + "-energy").with_suffix(
             TimeValue.default_suffix),
     )
     nrg.min = value.min()
     nrg.max = value.max()
     nrg.unit = "dB"
     return (nrg, )
    def process(
        self,
        progressTracker: Optional[DefaultProgressTracker] = None
    ) -> Tuple[TimeValue]:
        if progressTracker is not None:
            self.progressTracker = progressTracker
        wav = self.data.wave
        assert isinstance(wav, Wave)
        self.progressTracker.update(10)
        ftr, time, frequency = dsp.spectrogram(
            wav,
            self.parameters["frame_size"],
            self.parameters["frame_size"],  # frame_rate = frame_size
            NFFT=self.parameters["NFFT"],
            normalized=self.parameters["normalized"],
        )
        if self.parameters["normalized"]:
            ftr = ftr - np.mean(ftr, axis=1).reshape(-1, 1)

        time = (time[:-1] + time[1:]) // 2
        assert self.parameters["delta_order"] > 0
        dynamic_win = np.arange(-self.parameters["delta_order"],
                                self.parameters["delta_order"] + 1)

        win_width = self.parameters["delta_order"]
        win_length = 2 * win_width + 1
        den = 0
        for s in range(1, win_width + 1):
            den += s**2
        den *= 2
        dynamic_win = dynamic_win / den

        N, D = ftr.shape
        print(N)
        temp_array = np.zeros((N + 2 * win_width, D))
        delta_array = np.zeros((N, D))
        self.progressTracker.update(90)
        temp_array[win_width:N + win_width] = ftr
        for w in range(win_width):
            temp_array[w, :] = ftr[0, :]
            temp_array[N + win_width + w, :] = ftr[-1, :]

        for i in range(N):
            for w in range(win_length):
                delta_array[i, :] += temp_array[i + w, :] * dynamic_win[w]
        value = np.mean(np.diff(delta_array, axis=0)**2, axis=1)**0.5
        dis = TimeValue(
            time,
            value,
            wav.fs,
            wav.duration,
            path=wav.path.with_name(wav.path.stem + "-discont").with_suffix(
                TimeValue.default_suffix),
        )
        dis.min = 0
        dis.max = value.max()
        dis.unit = "dB"
        dis.label = "spectral discontinuity"
        self.progressTracker.update(100)
        return (dis, )
예제 #3
0
 def process(
     self,
     progressTracker: Optional[DefaultProgressTracker] = None
 ) -> Tuple[Partition, TimeValue]:
     if progressTracker is not None:
         self.progressTracker = progressTracker
     wav = self.data.wave
     assert isinstance(wav, Wave)
     wav = wav.convert_dtype(np.float64)
     self.progressTracker.update(10)
     assert isinstance(wav, Wave)
     M, time, frequency = dsp.spectrogram(wav,
                                          self.parameters["frame_size"],
                                          self.parameters["frame_rate"])
     self.progressTracker.update(20)
     # Emax = np.atleast_2d(np.max(M, axis=1)).T
     Emax = 20 * np.log10(np.mean((10**(M / 10)), axis=1)**0.5)
     P = np.empty((len(Emax), 2))
     P[:, 0] = 1 / (1 + np.exp(Emax - self.parameters["threshold"]))
     P[:, 1] = 1 - P[:, 0]  # complement
     self.progressTracker.update(30)
     seq, _ = viterbi.search_smooth(P, self.parameters["smooth"])
     self.progressTracker.update(90)
     tmv = TimeValue(
         time,
         seq,
         wav.fs,
         wav.duration,
         wav.path.with_name(wav.path.stem + "-act").with_suffix(
             TimeValue.default_suffix),
     )
     par = Partition.from_TimeValue(tmv)
     par.value = np.char.mod("%d", par.value)
     emax = TimeValue(
         time,
         Emax,
         wav.fs,
         wav.duration,
         wav.path.with_name(wav.path.stem + "-emax").with_suffix(
             TimeValue.default_suffix),
     )
     emax.min = Emax.min()
     emax.max = Emax.max()
     emax.unit = "dB"
     emax.label = "maximum frequency magnitude"
     return par, emax
예제 #4
0
 def process(
     self,
     progressTracker: Optional[DefaultProgressTracker] = None
 ) -> Tuple[TimeValue]:
     if progressTracker is not None:
         self.progressTracker = progressTracker
     wav = self.data.wave
     self.progressTracker.update(10)
     assert isinstance(wav, Wave)
     ftr, time, frequency = dsp.spectrogram(
         wav,
         self.parameters["frame_size"],
         self.parameters["frame_rate"],
         NFFT=self.parameters["NFFT"],
     )
     self.progressTracker.update(50)
     a = frequency.searchsorted(self.parameters["freq_min"])
     b = frequency.searchsorted(self.parameters["freq_max"])
     # import time as timer
     # print('searching')
     # tic = timer.time()
     seq, _ = viterbi.search_smooth(ftr[:, a:b], self.parameters["smooth"])
     self.progressTracker.update(90)
     # toc = timer.time()
     # print(f'done, took: {toc-tic}')
     trk = TimeValue(
         time,
         frequency[a + seq],
         wav.fs,
         wav.duration,
         wav.path.with_name(wav.path.stem + "-peak").with_suffix(
             TimeValue.default_suffix),
     )
     trk.min = 0
     trk.max = wav.fs / 2
     trk.unit = "Hz"
     trk.label = "frequency"
     return (trk, )
예제 #5
0
    def process(
        self,
        progressTracker: Optional[DefaultProgressTracker] = None
    ) -> Tuple[TimeValue, TimeValue, Partition]:
        if progressTracker is not None:
            self.progressTracker = progressTracker
        wav = self.data.wave
        assert isinstance(wav, Wave)
        wav = wav.convert_dtype(np.float64)
        self.progressTracker.update(10)
        assert isinstance(wav, Wave)
        R, time, frequency = dsp.correlogram(wav,
                                             self.parameters["frame_size"],
                                             self.parameters["frame_rate"])

        self.progressTracker.update(30)
        assert isinstance(wav, Wave)
        t0_min = int(round(wav.fs / self.parameters["f0_max"]))
        t0_max = int(round(wav.fs / self.parameters["f0_min"]))
        index = np.arange(t0_min, t0_max + 1, dtype=np.int16)
        E = R[:, 0]  # energy
        R = R[:, index]  # only look at valid candidates
        # normalize
        R -= R.min()
        R /= R.max()
        # find best sequence
        seq, _ = viterbi.search_smooth(R, self.parameters["smooth"])
        self.progressTracker.update(80)

        f0 = wav.fs / (t0_min + seq)
        # degree of periodicity
        dop = R[np.arange(R.shape[0]), seq]
        # voicing
        v = ((dop > self.parameters["dop threshold"])
             & (E > self.parameters["energy threshold"])
             #  (seq > 0) & (seq < len(index) - 1)
             ).astype(np.int16)
        v = signal.medfilt(v, 5)  # TODO: replace by a 2-state HMM
        f0[v == 0] = np.nan
        # prepare tracks
        f0 = TimeValue(
            time,
            f0,
            wav.fs,
            wav.duration,
            wav.path.with_name(wav.path.stem + "-f0").with_suffix(
                TimeValue.default_suffix),
        )
        f0.min = self.parameters["f0_min"]
        f0.max = self.parameters["f0_max"]
        f0.unit = "Hz"
        f0.label = "F0"
        dop = TimeValue(
            time,
            dop,
            wav.fs,
            wav.duration,
            wav.path.with_name(wav.path.stem + "-dop").with_suffix(
                TimeValue.default_suffix),
        )
        dop.min = 0
        dop.max = 1
        dop.label = "degree of periodicity"
        vox = TimeValue(
            time,
            v,
            wav.fs,
            wav.duration,
            wav.path.with_name(wav.path.stem + "-vox").with_suffix(
                TimeValue.default_suffix),
        )
        vox = Partition.from_TimeValue(vox)
        vox.label = "voicing"
        assert isinstance(f0, TimeValue)
        assert isinstance(dop, TimeValue)
        assert isinstance(vox, Partition)
        return f0, dop, vox