class MelBank(Recording): def __init__(self): ## -----*----- コンストラクタ -----*----- ## super().__init__() # 分離器 self.infer = Separator() # 親クラスのstreamとは異なる(Float32,rateが2倍) self.f_stream = self._pa.open( format=pyaudio.paFloat32, channels=self.settings['channels'], rate=self.settings['rate'] * 4, input=True, output=False, frames_per_buffer=self.settings['chunk'] ) # 立ち上がり・下がり検出数 self.cnt_edge = {'up': 0, 'down': 0} # 音量・閾値などの状態保管 self.state = {'amp': 0, 'total': 0, 'cnt': 0, 'border': 9999, 'average': 0} # コンソール出力 self.console = Console('./config/outer.txt') self.color_record = 90 self.color_separate = 90 self.is_separate = False def start(self): ## -----*----- 検出スタート -----*----- ## time.sleep(self.settings['past_second']) # 閾値の更新を行うサブスレッドの起動 thread = threading.Thread(target=self.update_border) thread.start() self.pastTime = time.time() while not self.is_exit: try: if time.time() - self.pastTime > 0.5: self.reset_state() self.state['cnt'] += 1 self.detection() sys.stdout.flush() except KeyboardInterrupt: os.system('clear') self.is_exit = True def detection(self): ## -----*----- 立ち上がり・立ち下がり検出 -----*----- ## voiceData = np.fromstring(self.f_stream.read(self.settings['chunk'], exception_on_overflow=False), np.float32) voiceData *= np.hanning(self.settings['chunk']) # 振幅スペクトル(0~8000[Hz]) x = np.fft.fft(voiceData) # パワースペクトル amplitudeSpectrum = [np.sqrt(c.real ** 2 + c.imag ** 2) for c in x] # バンドパスフィルタ(100~5000[Hz]) amplitudeSpectrum = amplitudeSpectrum[ int((self.settings['chunk'] / (self.settings['rate'] * 2)) * 100): int((self.settings['chunk'] / (self.settings['rate'] * 2)) * 5000)] # Amp値・平均値の算出 self.state['amp'] = sum(amplitudeSpectrum) self.state['total'] += self.state['amp'] self.state['average'] = self.state['total'] / self.state['cnt'] # コンソール出力 self.console.draw(int(self.state['average']), int(self.state['amp']), int(self.state['border']), '\033[{0}m録音中\033[0m'.format(self.color_record), '\033[{0}m分離中\033[0m'.format(self.color_separate), *self.meter()) if self.is_separate and self.record_end.is_set(): thread = threading.Thread(target=self.separator) thread.start() self.is_separate = False else: # 立ち上がり検出 if self.up_edge() and self.record_end.is_set() and self.color_separate == 90: self.record_start.set() self.record_end.clear() self.state['border'] = self.state['average'] self.color_record = 32 if self.down_edge() and (not self.record_end.is_set()): self.record_start.clear() self.reset_state() self.color_record = 90 self.is_separate = True self.color_separate = 32 def up_edge(self): ## -----*----- 立ち上がり検出 -----*----- ## if not self.record_start.is_set(): if self.state['amp'] >= self.state['border']: self.cnt_edge['up'] += 1 if self.cnt_edge['up'] > 5: return True return False def down_edge(self): ## -----*----- 立ち下がり検出 -----*----- ## if self.record_start.is_set(): if self.state['average'] <= self.state['border']: self.cnt_edge['down'] += 1 if self.cnt_edge['down'] > 10: self.cnt_edge['up'] = self.cnt_edge['down'] = 0 return True return False def reset_state(self): ## -----*----- 状態のリセット -----*----- ## self.state['total'] = self.state['average'] * 15 self.state['cnt'] = 15 if self.state['average'] >= self.state['amp']: self.cnt_edge['up'] = 0 self.pastTime = time.time() def update_border(self): ## -----*----- 閾値の更新 -----*----- ## offset = range(50, 201, 10) while not self.is_exit: time.sleep(0.2) if self.cnt_edge['up'] < 3 and not self.record_start.is_set(): if int(self.state['average'] / 20) > len(offset) - 1: i = len(offset) - 1 else: i = int(self.state['average'] / 20) self.state['border'] = pow(10, 1.13) * pow(self.state['average'], 0.72) def meter(self): ## -----*----- 音量メーター生成 -----*----- ## meter = [''] * 3 keys = ['average', 'amp', 'border'] for i in range(3): for j in range(int(self.state[keys[i]] / 20 + 3)): meter[i] += '■' if self.record_start.is_set(): if self.state['average'] >= self.state['border']: meter[0] = '\033[94m' + meter[0] + '\033[0m' elif self.state['amp'] >= self.state['border']: meter[1] = '\033[94m' + meter[1] + '\033[0m' return meter def separator(self): ## -----*----- 音楽分離 -----*----- ## self.infer.separate(self.file) self.player('./tmp/separate.wav') self.color_separate = 90 return def player(self, path): ## -----*----- 音楽再生 -----*----- ## pa = pyaudio.PyAudio() wavFile = wave.open(path, 'rb') stream = pa.open( format=pa.get_format_from_width(wavFile.getsampwidth()), channels=wavFile.getnchannels(), rate=wavFile.getframerate(), output=True, ) voiceData = wavFile.readframes(self.settings['chunk']) while len(voiceData) > 0: stream.write(voiceData) voiceData = wavFile.readframes(self.settings['chunk']) stream.stop_stream() stream.close() pa.terminate()
class RealTimeAudioSeparator(object): def __init__(self, chunk=1024): """コンストラクタ Keyword Arguments: chunk {int} -- バッファサイズ (default: {1024}) """ self.chunk = chunk self.ac = AudioController() self.buffer = np.zeros((self.chunk), dtype=np.float32) self.window_func = np.hamming(self.chunk) self.osc = OscClient("localhost", 5000) self.separate_results = None self.timbers = None self.separate_flag = True self.sep_thread = None self.osc_thread = None def calc_spectrum(self, data): """スペクトルを計算する Arguments: data {numpy.array} -- 入力データ Returns: numpy.array -- スペクトル """ self.buffer[:self.chunk // 2] = self.buffer[self.chunk // 2:] self.buffer[self.chunk // 2:] = data F = np.fft.fft(self.buffer * self.window_func) amp = np.abs(F)[:self.chunk // 2] return amp / np.sum(amp) def setup(self): """初期設定を行う""" files = glob.glob("./audio/*") if len(files) == 0: print("audioディレクトリにwavファイルを置いてください") exit(1) self.separator = Separator(self.chunk // 2, 1, len(files)) for i, f in enumerate(files): name = f.split("/")[-1].split('.')[0] data, ch, fs = self.ac.read_wav(f) spectrum = np.zeros((self.chunk // 2), dtype=np.float32) for j in range(0, len(data), self.chunk // 2): amp = self.calc_spectrum(data[j:j + self.chunk // 2]) spectrum += amp self.separator.set_dictionary(spectrum / j, i, name=name) def run(self): """処理を開始する""" print("======================= RUN =======================") self.ac.setup_stream(chunck=self.chunk // 2) with conf.ThreadPoolExecutor(max_workers=2) as executor: self.start_separate(executor) while True: print("> ", end="") line = input() if line == 'a': self.stop_separate(executor) self.add_timber() self.start_separate(executor) elif line == 'c': self.stop_separate(executor) self.change_timber() self.start_separate(executor) elif line == 'q': self.stop_separate(executor) break elif line == 's': self.print_timber_list() elif line == 'h': print('\'a\' is add timber.') print('\'c\' is change timber.') print('\'s\' is show timber list.') print('\'q\' is shutdown application.') self.ac.close_stream() def print_countdown(self): """録音時のカウントダウンを表示する""" print('please input audio {}[sec]!!!'.format(TIME)) print('============3============') time.sleep(1) print('============2============') time.sleep(1) print('============1============') time.sleep(1) print('RECORD!!') def print_timber_list(self): """現在の音色のリストを表示する""" timbers = self.separator.get_timber() for k, v in timbers.items(): print("Timber:{} : {}".format(k, v)) def add_timber(self): """音色を追加する""" self.print_countdown() spectrum = self.record() self.separator.add_dictionary(spectrum) print("finish add") def change_timber(self): """登録してある音色を変更する""" timber_index = -1 timber_name = None while True: print( 'Please input [timber_index,timber_name] to change. (cancel is [q] key)' ) self.print_timber_list() print("> ", end="") line = input() if len(line.split(",")) != 2 and line != 'q': continue if line == 'q': return if not line.split(",")[0].isdecimal(): continue timber_index, timber_name = int( line.split(",")[0]), line.split(",")[1] if (timber_index > len(self.separator.get_timber()) - 1) or ( timber_index < 0): print('[error] index out of range.') continue break self.print_countdown() spectrum = self.record() self.separator.set_dictionary(spectrum, timber_index, name=timber_name) print('finish change') def record(self): """録音する Returns: numpy.array -- スペクトル """ counter = 0 it = 0 spectrum = np.zeros((self.chunk // 2), dtype=np.float32) self.ac.clear_buffer() self.ac.start_stream() while counter < TIME: if self.ac.q.__len__() > 0: data = self.ac.q.popleft() spectrum += self.calc_spectrum(data) counter += self.ac.chunk / self.ac.rate it += 1 self.ac.stop_stream() return spectrum / it def start_separate(self, executor): """スレッド処理を開始する Arguments: executor {conf.ThreadPoolExecutor} -- confインスタンス """ self.timbers = self.separator.get_timber() if "noise" not in list(self.timbers.values()): self.timbers[len(self.timbers)] = "noise" msg = [len(self.timbers)] msg.extend([v for v in self.timbers.values()]) self.osc.send(OSC_TIMBER_ADDR, msg) self.ac.clear_buffer() self.ac.start_stream() self.separate_flag = True self.sep_thread = executor.submit(self.separate_sound) self.osc_thread = executor.submit(self.send_result) def stop_separate(self, executor): """スレッド処理を停止する Arguments: executor {conf.ThreadPoolExecutor} -- confインスタンス """ self.separate_flag = False self.sep_thread.result() self.sep_thread = None self.osc_thread.result() self.osc_thread = None self.ac.clear_buffer() self.ac.stop_stream() def separate_sound(self): """音源分離を行う""" while self.separate_flag: if self.ac.q.__len__() > 0: data = self.ac.q.popleft() spectrum = self.calc_spectrum(data) self.separate_results = self.separator.separate(spectrum) def send_result(self): """音源分離の結果を送信する""" while self.separate_flag: response = [] if self.separate_results is None: continue if self.separate_results.shape[0] != len(self.timbers): continue for i, _ in enumerate(self.timbers.values()): response.append(self.separate_results[i][0]) self.osc.send(OSC_RESULT_ADDR, response) time.sleep(OSC_FREQ)
infer = Separator() record = Recording() os.system('clear') print('*** ENTERを押して録音開始・終了 ***') mode = 0 # 0:録音開始,1:録音終了 cnt = 1 while True: key = input() if mode == 0: # 録音開始 print("===== {0} START ===============".format(cnt)) record.record_start.set() record.record_end.clear() mode = 1 else: # 録音終了 print("===== END ===============") record.record_start.clear() while not record.record_end.is_set(): pass infer.separate('./tmp/source.wav') player('./tmp/separate.wav') mode = 0 cnt += 1