def __init__(self): ## -----*----- コンストラクタ -----*----- ## super().__init__() # 分離器 self.infer = Separator() # 親クラスのstreamとは異なる(Float32,rateが2倍) self.f_stream = self._pa.open( format=pyaudio.paFloat32, channels=self.settings['channels'], rate=self.settings['rate'] * 4, input=True, output=False, frames_per_buffer=self.settings['chunk'] ) # 立ち上がり・下がり検出数 self.cnt_edge = {'up': 0, 'down': 0} # 音量・閾値などの状態保管 self.state = {'amp': 0, 'total': 0, 'cnt': 0, 'border': 9999, 'average': 0} # コンソール出力 self.console = Console('./config/outer.txt') self.color_record = 90 self.color_separate = 90 self.is_separate = False
def __init__(self, is_filter=True): ## -----*----- コンストラクタ -----*----- ## # 分離器 self.infer = Separator() self._pa = pyaudio.PyAudio() # 音声入力の設定 self.settings = { 'format': pyaudio.paInt16, 'channels': 1, 'rate': 8000, 'chunk': 1024 } self.stream = self._pa.open(format=self.settings['format'], channels=self.settings['channels'], rate=self.settings['rate'], input=True, output=True, frames_per_buffer=self.settings['chunk']) # 音声波形を格納 self.wav = None self.wav_separate = None # フラグ一覧 self.is_filter = is_filter self.is_input = True self.is_separate = False self.is_end_separate = False self.strage = { 'original': np.zeros(1024 * 20), 'separated': np.zeros(1024 * 20) }
def setup(self): """初期設定を行う""" files = glob.glob("./audio/*") if len(files) == 0: print("audioディレクトリにwavファイルを置いてください") exit(1) self.separator = Separator(self.chunk // 2, 1, len(files)) for i, f in enumerate(files): name = f.split("/")[-1].split('.')[0] data, ch, fs = self.ac.read_wav(f) spectrum = np.zeros((self.chunk // 2), dtype=np.float32) for j in range(0, len(data), self.chunk // 2): amp = self.calc_spectrum(data[j:j + self.chunk // 2]) spectrum += amp self.separator.set_dictionary(spectrum / j, i, name=name)
def get_train_data(pad, mode): # load data train, test = load_data(CORPUS_PATH) # init separator sep = Separator(mode, SENTENCEPIECE_PATH) sep.train(train) sentencepiece = mode == "sentencepiece" # split text train = sep.sep_df_text(train, sentencepiece) test = sep.sep_df_text(test, sentencepiece) # word to id str2idx = Str2idx(train) n_vocab = str2idx.get_n_vocab() train = str2idx(train, pad) test = str2idx(test, pad) return train, test, n_vocab
def entrypoint(arguments, params): """ Command entrypoint. :param arguments: Command line parsed argument as argparse.Namespace. :param params: Deserialized JSON configuration file provided in CLI args. """ # TODO: check with output naming. print('entrypoint') print('inputs args:') print(arguments.inputs) print('audio adapter arguments:') print(arguments.audio_adapter) print('arguments config:') print(arguments.configuration) print('MWF arguments') print(arguments.MWF) print('stft backend arguments') print(arguments.stft_backend) audio_adapter = get_audio_adapter(arguments.audio_adapter) print(audio_adapter) print(audio_adapter.__dict__) separator = Separator(arguments.configuration, MWF=arguments.MWF, stft_backend=arguments.stft_backend) print(separator.__dict__) for filename in arguments.inputs: print('Todos los argumentos de separate_to_file:') print(filename) print(arguments.output_path) print(audio_adapter) print(arguments.offset) print(arguments.duration) print(arguments.codec) print(arguments.bitrate) print(arguments.filename_format) separator.separate_to_file(filename, arguments.output_path, audio_adapter=audio_adapter, offset=arguments.offset, duration=arguments.duration, codec=arguments.codec, bitrate=arguments.bitrate, filename_format=arguments.filename_format, synchronous=False) separator.join()
if to_int: wav = np.array(wav, dtype='int16') return wav p = pyaudio.PyAudio() CHUNK = 2048 stream = p.open(format=pyaudio.paInt16, channels=1, rate=8000, frames_per_buffer=CHUNK, input=True, output=True) infer = Separator() while stream.is_active(): output = [] wav = np.fromstring(stream.read(CHUNK, exception_on_overflow=False), np.int16) print(wav.shape) spec = stft(wav, False).T spec_pred = stft(wav, True).T for t in range(spec.shape[0]): pred = infer.predict(spec_pred[t]) for i in range(129): if pred[i] > 0.75: spec[t][i] *= pred[i] elif pred[i] > 0.5: spec[t][i] *= 0.1 else:
class Filter(object): def __init__(self, is_filter=True): ## -----*----- コンストラクタ -----*----- ## # 分離器 self.infer = Separator() self._pa = pyaudio.PyAudio() # 音声入力の設定 self.settings = { 'format': pyaudio.paInt16, 'channels': 1, 'rate': 8000, 'chunk': 1024 } self.stream = self._pa.open(format=self.settings['format'], channels=self.settings['channels'], rate=self.settings['rate'], input=True, output=True, frames_per_buffer=self.settings['chunk']) # 音声波形を格納 self.wav = None self.wav_separate = None # フラグ一覧 self.is_filter = is_filter self.is_input = True self.is_separate = False self.is_end_separate = False self.strage = { 'original': np.zeros(1024 * 20), 'separated': np.zeros(1024 * 20) } def exe(self): ## -----*----- 処理実行 -----*----- ## thread = threading.Thread(target=self.audio_input) thread.start() thread = threading.Thread(target=self.audio_seaprate) thread.start() self.audio_output() def graphplot(self): if not self.is_end_separate: return self.strage['original'] = np.append( np.delete(self.strage['original'], range(1024)), self.wav) self.strage['separated'] = np.append( np.delete(self.strage['separated'], range(1024)), self.wav_separate) plt.clf() # Original plt.subplot(311) plt.specgram(self.strage['original'], Fs=self.settings['rate']) # Separated plt.subplot(312) plt.specgram(self.strage['separated'], Fs=self.settings['rate']) # Pause plt.pause(.01) def audio_input(self): ## -----*----- 音声入力 -----*----- ## while self.stream.is_active(): self.wav = np.fromstring( self.stream.read(self.settings['chunk'], exception_on_overflow=False), np.int16) self.graphplot() # 録音開始フラグ反転 self.is_input = False self.is_separate = True def audio_output(self): ## -----*----- 音声出力 -----*----- ## while self.stream.is_active(): if not self.is_input: # 録音開始フラグ反転 self.is_input = True # 再生 if self.is_end_separate: output = b''.join(self.wav_separate) self.stream.write(output) def audio_seaprate(self): ## -----*----- 音声分離 -----*----- ## while self.stream.is_active(): if self.is_separate: spec = self.infer.stft(self.wav, to_log=False).T spec_pred = self.infer.stft(self.wav, to_log=True).T # 分離 if self.is_filter: for t in range(spec.shape[0]): pred = self.infer.predict(spec_pred[t]) n = 0 for i in range(129): # 低周波成分をカット if n < 2: spec[t][i] = 0 n += 1 continue if pred[i] > 0.8: spec[t][i] *= 1.0 elif pred[i] > 0.75: spec[t][i] *= pred[i] elif pred[i] > 0.6: spec[t][i] *= 0.005 elif pred[i] > 0.5: spec[t][i] *= 0.002 else: spec[t][i] *= 0.001 self.wav_separate = self.infer.istft(spec.T) self.is_separate = False self.is_end_separate = True
class RealTimeAudioSeparator(object): def __init__(self, chunk=1024): """コンストラクタ Keyword Arguments: chunk {int} -- バッファサイズ (default: {1024}) """ self.chunk = chunk self.ac = AudioController() self.buffer = np.zeros((self.chunk), dtype=np.float32) self.window_func = np.hamming(self.chunk) self.osc = OscClient("localhost", 5000) self.separate_results = None self.timbers = None self.separate_flag = True self.sep_thread = None self.osc_thread = None def calc_spectrum(self, data): """スペクトルを計算する Arguments: data {numpy.array} -- 入力データ Returns: numpy.array -- スペクトル """ self.buffer[:self.chunk // 2] = self.buffer[self.chunk // 2:] self.buffer[self.chunk // 2:] = data F = np.fft.fft(self.buffer * self.window_func) amp = np.abs(F)[:self.chunk // 2] return amp / np.sum(amp) def setup(self): """初期設定を行う""" files = glob.glob("./audio/*") if len(files) == 0: print("audioディレクトリにwavファイルを置いてください") exit(1) self.separator = Separator(self.chunk // 2, 1, len(files)) for i, f in enumerate(files): name = f.split("/")[-1].split('.')[0] data, ch, fs = self.ac.read_wav(f) spectrum = np.zeros((self.chunk // 2), dtype=np.float32) for j in range(0, len(data), self.chunk // 2): amp = self.calc_spectrum(data[j:j + self.chunk // 2]) spectrum += amp self.separator.set_dictionary(spectrum / j, i, name=name) def run(self): """処理を開始する""" print("======================= RUN =======================") self.ac.setup_stream(chunck=self.chunk // 2) with conf.ThreadPoolExecutor(max_workers=2) as executor: self.start_separate(executor) while True: print("> ", end="") line = input() if line == 'a': self.stop_separate(executor) self.add_timber() self.start_separate(executor) elif line == 'c': self.stop_separate(executor) self.change_timber() self.start_separate(executor) elif line == 'q': self.stop_separate(executor) break elif line == 's': self.print_timber_list() elif line == 'h': print('\'a\' is add timber.') print('\'c\' is change timber.') print('\'s\' is show timber list.') print('\'q\' is shutdown application.') self.ac.close_stream() def print_countdown(self): """録音時のカウントダウンを表示する""" print('please input audio {}[sec]!!!'.format(TIME)) print('============3============') time.sleep(1) print('============2============') time.sleep(1) print('============1============') time.sleep(1) print('RECORD!!') def print_timber_list(self): """現在の音色のリストを表示する""" timbers = self.separator.get_timber() for k, v in timbers.items(): print("Timber:{} : {}".format(k, v)) def add_timber(self): """音色を追加する""" self.print_countdown() spectrum = self.record() self.separator.add_dictionary(spectrum) print("finish add") def change_timber(self): """登録してある音色を変更する""" timber_index = -1 timber_name = None while True: print( 'Please input [timber_index,timber_name] to change. (cancel is [q] key)' ) self.print_timber_list() print("> ", end="") line = input() if len(line.split(",")) != 2 and line != 'q': continue if line == 'q': return if not line.split(",")[0].isdecimal(): continue timber_index, timber_name = int( line.split(",")[0]), line.split(",")[1] if (timber_index > len(self.separator.get_timber()) - 1) or ( timber_index < 0): print('[error] index out of range.') continue break self.print_countdown() spectrum = self.record() self.separator.set_dictionary(spectrum, timber_index, name=timber_name) print('finish change') def record(self): """録音する Returns: numpy.array -- スペクトル """ counter = 0 it = 0 spectrum = np.zeros((self.chunk // 2), dtype=np.float32) self.ac.clear_buffer() self.ac.start_stream() while counter < TIME: if self.ac.q.__len__() > 0: data = self.ac.q.popleft() spectrum += self.calc_spectrum(data) counter += self.ac.chunk / self.ac.rate it += 1 self.ac.stop_stream() return spectrum / it def start_separate(self, executor): """スレッド処理を開始する Arguments: executor {conf.ThreadPoolExecutor} -- confインスタンス """ self.timbers = self.separator.get_timber() if "noise" not in list(self.timbers.values()): self.timbers[len(self.timbers)] = "noise" msg = [len(self.timbers)] msg.extend([v for v in self.timbers.values()]) self.osc.send(OSC_TIMBER_ADDR, msg) self.ac.clear_buffer() self.ac.start_stream() self.separate_flag = True self.sep_thread = executor.submit(self.separate_sound) self.osc_thread = executor.submit(self.send_result) def stop_separate(self, executor): """スレッド処理を停止する Arguments: executor {conf.ThreadPoolExecutor} -- confインスタンス """ self.separate_flag = False self.sep_thread.result() self.sep_thread = None self.osc_thread.result() self.osc_thread = None self.ac.clear_buffer() self.ac.stop_stream() def separate_sound(self): """音源分離を行う""" while self.separate_flag: if self.ac.q.__len__() > 0: data = self.ac.q.popleft() spectrum = self.calc_spectrum(data) self.separate_results = self.separator.separate(spectrum) def send_result(self): """音源分離の結果を送信する""" while self.separate_flag: response = [] if self.separate_results is None: continue if self.separate_results.shape[0] != len(self.timbers): continue for i, _ in enumerate(self.timbers.values()): response.append(self.separate_results[i][0]) self.osc.send(OSC_RESULT_ADDR, response) time.sleep(OSC_FREQ)
class Filter(object): def __init__(self, is_filter=True): ## -----*----- コンストラクタ -----*----- ## # 分離器 self.infer = Separator() self._pa = pyaudio.PyAudio() # 音声入力の設定 self.settings = { 'format': pyaudio.paInt16, 'channels': 1, 'rate': 8000, 'chunk': 1024 } self.stream = self._pa.open(format=self.settings['format'], channels=self.settings['channels'], rate=self.settings['rate'], input=True, output=True, frames_per_buffer=self.settings['chunk']) # 音声波形を格納 self.wav = None self.wav_separate = None # フラグ一覧 self.is_filter = is_filter self.is_input = True self.is_separate = False self.is_end_separate = False def exe(self): ## -----*----- 処理実行 -----*----- ## thread = threading.Thread(target=self.audio_input) thread.start() thread = threading.Thread(target=self.audio_seaprate) thread.start() self.audio_output() def audio_input(self): ## -----*----- 音声入力 -----*----- ## while self.stream.is_active(): self.wav = np.fromstring( self.stream.read(self.settings['chunk'], exception_on_overflow=False), np.int16) # 録音開始フラグ反転 self.is_input = False self.is_separate = True def audio_output(self): ## -----*----- 音声出力 -----*----- ## while self.stream.is_active(): if not self.is_input: # 録音開始フラグ反転 self.is_input = True # 再生 if self.is_end_separate: output = b''.join(self.wav_separate) self.stream.write(output) def audio_seaprate(self): ## -----*----- 音声分離 -----*----- ## while self.stream.is_active(): if self.is_separate: spec = self.infer.stft(self.wav, to_log=False).T spec_pred = self.infer.stft(self.wav, to_log=True).T # 分離 if self.is_filter: for t in range(spec.shape[0]): pred = self.infer.predict(spec_pred[t]) for i in range(129): if pred[i] > 0.75: spec[t][i] *= pred[i] elif pred[i] > 0.5: spec[t][i] *= 0.1 else: spec[t][i] = 0 self.wav_separate = self.infer.istft(spec.T) self.is_separate = False self.is_end_separate = True
def add_separator_horizontal(self, position): separator = Separator() separator.position = position separator.direction = HORIZONTAL self.separators.append(separator) self.handler.control.append(separator.control)
# -*- coding: utf-8 -*- from separator import Separator if __name__ == '__main__': Separator(True)
def setUp(self): self.x = Separator()
class MelBank(Recording): def __init__(self): ## -----*----- コンストラクタ -----*----- ## super().__init__() # 分離器 self.infer = Separator() # 親クラスのstreamとは異なる(Float32,rateが2倍) self.f_stream = self._pa.open( format=pyaudio.paFloat32, channels=self.settings['channels'], rate=self.settings['rate'] * 4, input=True, output=False, frames_per_buffer=self.settings['chunk'] ) # 立ち上がり・下がり検出数 self.cnt_edge = {'up': 0, 'down': 0} # 音量・閾値などの状態保管 self.state = {'amp': 0, 'total': 0, 'cnt': 0, 'border': 9999, 'average': 0} # コンソール出力 self.console = Console('./config/outer.txt') self.color_record = 90 self.color_separate = 90 self.is_separate = False def start(self): ## -----*----- 検出スタート -----*----- ## time.sleep(self.settings['past_second']) # 閾値の更新を行うサブスレッドの起動 thread = threading.Thread(target=self.update_border) thread.start() self.pastTime = time.time() while not self.is_exit: try: if time.time() - self.pastTime > 0.5: self.reset_state() self.state['cnt'] += 1 self.detection() sys.stdout.flush() except KeyboardInterrupt: os.system('clear') self.is_exit = True def detection(self): ## -----*----- 立ち上がり・立ち下がり検出 -----*----- ## voiceData = np.fromstring(self.f_stream.read(self.settings['chunk'], exception_on_overflow=False), np.float32) voiceData *= np.hanning(self.settings['chunk']) # 振幅スペクトル(0~8000[Hz]) x = np.fft.fft(voiceData) # パワースペクトル amplitudeSpectrum = [np.sqrt(c.real ** 2 + c.imag ** 2) for c in x] # バンドパスフィルタ(100~5000[Hz]) amplitudeSpectrum = amplitudeSpectrum[ int((self.settings['chunk'] / (self.settings['rate'] * 2)) * 100): int((self.settings['chunk'] / (self.settings['rate'] * 2)) * 5000)] # Amp値・平均値の算出 self.state['amp'] = sum(amplitudeSpectrum) self.state['total'] += self.state['amp'] self.state['average'] = self.state['total'] / self.state['cnt'] # コンソール出力 self.console.draw(int(self.state['average']), int(self.state['amp']), int(self.state['border']), '\033[{0}m録音中\033[0m'.format(self.color_record), '\033[{0}m分離中\033[0m'.format(self.color_separate), *self.meter()) if self.is_separate and self.record_end.is_set(): thread = threading.Thread(target=self.separator) thread.start() self.is_separate = False else: # 立ち上がり検出 if self.up_edge() and self.record_end.is_set() and self.color_separate == 90: self.record_start.set() self.record_end.clear() self.state['border'] = self.state['average'] self.color_record = 32 if self.down_edge() and (not self.record_end.is_set()): self.record_start.clear() self.reset_state() self.color_record = 90 self.is_separate = True self.color_separate = 32 def up_edge(self): ## -----*----- 立ち上がり検出 -----*----- ## if not self.record_start.is_set(): if self.state['amp'] >= self.state['border']: self.cnt_edge['up'] += 1 if self.cnt_edge['up'] > 5: return True return False def down_edge(self): ## -----*----- 立ち下がり検出 -----*----- ## if self.record_start.is_set(): if self.state['average'] <= self.state['border']: self.cnt_edge['down'] += 1 if self.cnt_edge['down'] > 10: self.cnt_edge['up'] = self.cnt_edge['down'] = 0 return True return False def reset_state(self): ## -----*----- 状態のリセット -----*----- ## self.state['total'] = self.state['average'] * 15 self.state['cnt'] = 15 if self.state['average'] >= self.state['amp']: self.cnt_edge['up'] = 0 self.pastTime = time.time() def update_border(self): ## -----*----- 閾値の更新 -----*----- ## offset = range(50, 201, 10) while not self.is_exit: time.sleep(0.2) if self.cnt_edge['up'] < 3 and not self.record_start.is_set(): if int(self.state['average'] / 20) > len(offset) - 1: i = len(offset) - 1 else: i = int(self.state['average'] / 20) self.state['border'] = pow(10, 1.13) * pow(self.state['average'], 0.72) def meter(self): ## -----*----- 音量メーター生成 -----*----- ## meter = [''] * 3 keys = ['average', 'amp', 'border'] for i in range(3): for j in range(int(self.state[keys[i]] / 20 + 3)): meter[i] += '■' if self.record_start.is_set(): if self.state['average'] >= self.state['border']: meter[0] = '\033[94m' + meter[0] + '\033[0m' elif self.state['amp'] >= self.state['border']: meter[1] = '\033[94m' + meter[1] + '\033[0m' return meter def separator(self): ## -----*----- 音楽分離 -----*----- ## self.infer.separate(self.file) self.player('./tmp/separate.wav') self.color_separate = 90 return def player(self, path): ## -----*----- 音楽再生 -----*----- ## pa = pyaudio.PyAudio() wavFile = wave.open(path, 'rb') stream = pa.open( format=pa.get_format_from_width(wavFile.getsampwidth()), channels=wavFile.getnchannels(), rate=wavFile.getframerate(), output=True, ) voiceData = wavFile.readframes(self.settings['chunk']) while len(voiceData) > 0: stream.write(voiceData) voiceData = wavFile.readframes(self.settings['chunk']) stream.stop_stream() stream.close() pa.terminate()
def add_separator_vertical(self, position): separator = Separator() separator.position = position separator.direction = VERTICAL self.separators.append(separator) self.handler.control.append(separator.control)
stream = pa.open( format=pa.get_format_from_width(wavFile.getsampwidth()), channels=wavFile.getnchannels(), rate=wavFile.getframerate(), output=True, ) voiceData = wavFile.readframes(1024) while len(voiceData) > 0: stream.write(voiceData) voiceData = wavFile.readframes(1024) stream.stop_stream() stream.close() pa.terminate() infer = Separator() record = Recording() os.system('clear') print('*** ENTERを押して録音開始・終了 ***') mode = 0 # 0:録音開始,1:録音終了 cnt = 1 while True: key = input() if mode == 0: # 録音開始 print("===== {0} START ===============".format(cnt)) record.record_start.set()