Esempio n. 1
0
class Filter(object):
    def __init__(self, is_filter=True):
        ## -----*----- コンストラクタ -----*----- ##
        # 分離器
        self.infer = Separator()

        self._pa = pyaudio.PyAudio()
        # 音声入力の設定
        self.settings = {
            'format': pyaudio.paInt16,
            'channels': 1,
            'rate': 8000,
            'chunk': 1024
        }
        self.stream = self._pa.open(format=self.settings['format'],
                                    channels=self.settings['channels'],
                                    rate=self.settings['rate'],
                                    input=True,
                                    output=True,
                                    frames_per_buffer=self.settings['chunk'])

        # 音声波形を格納
        self.wav = None
        self.wav_separate = None
        # フラグ一覧
        self.is_filter = is_filter
        self.is_input = True
        self.is_separate = False
        self.is_end_separate = False

        self.strage = {
            'original': np.zeros(1024 * 20),
            'separated': np.zeros(1024 * 20)
        }

    def exe(self):
        ## -----*----- 処理実行 -----*----- ##
        thread = threading.Thread(target=self.audio_input)
        thread.start()
        thread = threading.Thread(target=self.audio_seaprate)
        thread.start()

        self.audio_output()

    def graphplot(self):
        if not self.is_end_separate:
            return
        self.strage['original'] = np.append(
            np.delete(self.strage['original'], range(1024)), self.wav)
        self.strage['separated'] = np.append(
            np.delete(self.strage['separated'], range(1024)),
            self.wav_separate)
        plt.clf()
        # Original
        plt.subplot(311)
        plt.specgram(self.strage['original'], Fs=self.settings['rate'])
        # Separated
        plt.subplot(312)
        plt.specgram(self.strage['separated'], Fs=self.settings['rate'])
        # Pause
        plt.pause(.01)

    def audio_input(self):
        ## -----*----- 音声入力 -----*----- ##
        while self.stream.is_active():
            self.wav = np.fromstring(
                self.stream.read(self.settings['chunk'],
                                 exception_on_overflow=False), np.int16)

            self.graphplot()

            # 録音開始フラグ反転
            self.is_input = False
            self.is_separate = True

    def audio_output(self):
        ## -----*----- 音声出力 -----*----- ##
        while self.stream.is_active():
            if not self.is_input:
                # 録音開始フラグ反転
                self.is_input = True

                # 再生
                if self.is_end_separate:
                    output = b''.join(self.wav_separate)
                    self.stream.write(output)

    def audio_seaprate(self):
        ## -----*----- 音声分離 -----*----- ##
        while self.stream.is_active():
            if self.is_separate:
                spec = self.infer.stft(self.wav, to_log=False).T
                spec_pred = self.infer.stft(self.wav, to_log=True).T

                # 分離
                if self.is_filter:
                    for t in range(spec.shape[0]):
                        pred = self.infer.predict(spec_pred[t])
                        n = 0
                        for i in range(129):
                            # 低周波成分をカット
                            if n < 2:
                                spec[t][i] = 0
                                n += 1
                                continue
                            if pred[i] > 0.8:
                                spec[t][i] *= 1.0
                            elif pred[i] > 0.75:
                                spec[t][i] *= pred[i]
                            elif pred[i] > 0.6:
                                spec[t][i] *= 0.005
                            elif pred[i] > 0.5:
                                spec[t][i] *= 0.002
                            else:
                                spec[t][i] *= 0.001

                self.wav_separate = self.infer.istft(spec.T)

                self.is_separate = False
                self.is_end_separate = True
Esempio n. 2
0
                channels=1,
                rate=8000,
                frames_per_buffer=CHUNK,
                input=True,
                output=True)

infer = Separator()
while stream.is_active():
    output = []
    wav = np.fromstring(stream.read(CHUNK, exception_on_overflow=False),
                        np.int16)
    print(wav.shape)
    spec = stft(wav, False).T
    spec_pred = stft(wav, True).T
    for t in range(spec.shape[0]):
        pred = infer.predict(spec_pred[t])
        for i in range(129):
            if pred[i] > 0.75:
                spec[t][i] *= pred[i]
            elif pred[i] > 0.5:
                spec[t][i] *= 0.1
            else:
                spec[t][i] = 0

    wav = istft(spec.T)
    output = b''.join(wav)
    stream.write(output)

stream.stop_stream()
stream.close()
p.terminate()
Esempio n. 3
0
class Filter(object):
    def __init__(self, is_filter=True):
        ## -----*----- コンストラクタ -----*----- ##
        # 分離器
        self.infer = Separator()

        self._pa = pyaudio.PyAudio()
        # 音声入力の設定
        self.settings = {
            'format': pyaudio.paInt16,
            'channels': 1,
            'rate': 8000,
            'chunk': 1024
        }
        self.stream = self._pa.open(format=self.settings['format'],
                                    channels=self.settings['channels'],
                                    rate=self.settings['rate'],
                                    input=True,
                                    output=True,
                                    frames_per_buffer=self.settings['chunk'])

        # 音声波形を格納
        self.wav = None
        self.wav_separate = None
        # フラグ一覧
        self.is_filter = is_filter
        self.is_input = True
        self.is_separate = False
        self.is_end_separate = False

    def exe(self):
        ## -----*----- 処理実行 -----*----- ##
        thread = threading.Thread(target=self.audio_input)
        thread.start()
        thread = threading.Thread(target=self.audio_seaprate)
        thread.start()

        self.audio_output()

    def audio_input(self):
        ## -----*----- 音声入力 -----*----- ##
        while self.stream.is_active():
            self.wav = np.fromstring(
                self.stream.read(self.settings['chunk'],
                                 exception_on_overflow=False), np.int16)

            # 録音開始フラグ反転
            self.is_input = False
            self.is_separate = True

    def audio_output(self):
        ## -----*----- 音声出力 -----*----- ##
        while self.stream.is_active():
            if not self.is_input:
                # 録音開始フラグ反転
                self.is_input = True

                # 再生
                if self.is_end_separate:
                    output = b''.join(self.wav_separate)
                    self.stream.write(output)

    def audio_seaprate(self):
        ## -----*----- 音声分離 -----*----- ##
        while self.stream.is_active():
            if self.is_separate:
                spec = self.infer.stft(self.wav, to_log=False).T
                spec_pred = self.infer.stft(self.wav, to_log=True).T

                # 分離
                if self.is_filter:
                    for t in range(spec.shape[0]):
                        pred = self.infer.predict(spec_pred[t])
                        for i in range(129):
                            if pred[i] > 0.75:
                                spec[t][i] *= pred[i]
                            elif pred[i] > 0.5:
                                spec[t][i] *= 0.1
                            else:
                                spec[t][i] = 0

                self.wav_separate = self.infer.istft(spec.T)

                self.is_separate = False
                self.is_end_separate = True