def __init__(self, channels=8, ns=False, agc=0): super(DOA, self).__init__() self.channels = channels self.mask = [0, 1, 2, 3, 4, 5] self.pair = [[0, 3], [1, 4], [2, 5]] self.frame_size = 160 self.frame_bytes = self.frame_size * self.channels * 2 self.ap = AP(enable_ns=ns, agc_type=agc) self.ap.set_stream_format(16000, 1) self.queue = queue.Queue() self.done = False self.collections = collections.deque(maxlen=16) # prepare hanning window for stft self.window = np.hanning(self.frame_size) # self.window = None # length of stft self.nfft = 1 << (self.frame_size * 2 - 1).bit_length() print('fft size: {}'.format(self.nfft)) # self.nfft = 512 self.margin_f = 0.064 * 16000 / 340.0 self.interp = 2 self.margin = int(self.margin_f * self.interp) self.cc_baseline = [0] * len(self.pair)
def __init__(self, rate=16000, channels=1): super(NS, self).__init__() self.rate = rate self.channels = channels self._buf = b'' self._bytes_10ms = int(rate * channels * 2 / 100) self.ap = AP(enable_ns=True) self.ap.set_stream_format(rate, channels)
class NS(Element): def __init__(self, rate=16000, channels=1): super(NS, self).__init__() self.ap = AP(enable_ns=True) self.ap.set_stream_format(rate, channels) def put(self, data): data = self.ap.process_stream(data) super(NS, self).put(data)
def test_ns(): ap = AP(enable_ns=True) ap.set_ns_level(1) ap.set_stream_format(16000, 1) chunk = '\0\0' * 1600 for _ in range(16): out = ap.process_stream(chunk)
class NS(Element): def __init__(self, rate=16000, channels=1): super(NS, self).__init__() self.rate = rate self.channels = channels self._buf = b'' self._bytes_10ms = int(rate * channels * 2 / 100) self.ap = AP(enable_ns=True) self.ap.set_stream_format(rate, channels) def put(self, data): self._buf += data while len(self._buf) >= self._bytes_10ms: data = self._buf[:self._bytes_10ms] self._buf = self._buf[self._bytes_10ms:] # webrtc audio processing only support processing 10 ms audio each time data = self.ap.process_stream(data) super(NS, self).put(data)
def __init__(self, rate: int, channels: int, level: int = 0): """Creates a noise suppression element with the given configuration Args: rate (int): The audio sample rate, in Hz. channels (int): Number of channels level (int): Level of aggressiveness of the noise suppression algorithm. """ self.__channels = channels self.__rate = rate self.__frames_per_channel = int(rate * 0.01) self.__ap = AP(enable_ns=True) self.__ap.set_ns_level(level) self.__ap.set_stream_format(rate, channels)
def reduce_noise_and_write_output(input_path: str, ns: int, output_path=None): directory, filename = os.path.split(input_path) filename_no_ext, ext = os.path.splitext(filename) output_filepath = f'{filename_no_ext}_ns{ns}{ext}' if output_path: if os.path.isdir(output_path): output_filepath = os.path.join(output_path, output_filepath) else: output_filepath = output_path print(f'Processing {input_path} to {output_filepath}') with wave.open(input_path, 'rb') as wav, wave.open(output_filepath, 'wb') as out: rate = wav.getframerate() width = wav.getsampwidth() channels = wav.getnchannels() out.setnchannels(channels) out.setsampwidth(width) out.setframerate(rate) ap = AP(enable_ns=True) ap.set_ns_level(ns) ap.set_stream_format(rate, channels, rate, channels) frames_size = int(rate * 10 / 1000) # only support processing 10ms audio each time frames_bytes = frames_size * width * channels while True: data = wav.readframes(frames_size) if len(data) != frames_bytes: break data_out = ap.process_stream(data) out.writeframes(data_out)
def __init__(self, rate=16000, channels=1): super(NS, self).__init__() self.ap = AP(enable_ns=True) self.ap.set_stream_format(rate, channels)
class DOA(Element): def __init__(self, channels=8, ns=False, agc=0): super(DOA, self).__init__() self.channels = channels self.mask = [0, 1, 2, 3, 4, 5] self.pair = [[0, 3], [1, 4], [2, 5]] self.frame_size = 160 self.frame_bytes = self.frame_size * self.channels * 2 self.ap = AP(enable_ns=ns, agc_type=agc) self.ap.set_stream_format(16000, 1) self.queue = queue.Queue() self.done = False self.collections = collections.deque(maxlen=16) # prepare hanning window for stft self.window = np.hanning(self.frame_size) # self.window = None # length of stft self.nfft = 1 << (self.frame_size * 2 - 1).bit_length() print('fft size: {}'.format(self.nfft)) # self.nfft = 512 self.margin_f = 0.064 * 16000 / 340.0 self.interp = 2 self.margin = int(self.margin_f * self.interp) self.cc_baseline = [0] * len(self.pair) def put(self, data): self.queue.put(data) def start(self): self.done = False thread = threading.Thread(target=self.run) thread.daemon = True thread.start() def stop(self): pixel_ring.off() self.done = True def run(self): has_voice = 0 buffer = '' count = 0 pixel_ring_countdown = 0 while not self.done: data = self.queue.get() buffer += data while len(buffer) >= self.frame_bytes: data = buffer[:self.frame_bytes] buffer = buffer[self.frame_bytes:] data = np.fromstring(data, dtype='int16') mono = data[0::self.channels].tostring() mono = self.ap.process_stream(mono) has_voice = self.ap.has_voice() # sys.stdout.write('1' if has_voice else '0') # sys.stdout.flush() offset, direction = self._process(data) self.collections.append([direction, offset, has_voice]) count += 1 if count >= self.collections.maxlen: direction = self.get_direction() if direction: print('@ {}'.format(direction)) pixel_ring.wakeup(direction) pixel_ring_countdown = 10 else: if pixel_ring_countdown > 0: pixel_ring_countdown -= 1 if pixel_ring_countdown == 0: pixel_ring.off() count = 0 super(DOA, self).put(mono) def set_callback(self, callback): if callable(callback): self.on_detected = callback else: ValueError('The callback parameter is not callable') def get_direction(self): counting = [0] * 12 voice = 0 for d in self.collections: if d[2]: voice += 1 counting[d[0]] += 1 direction_index = np.argmax(counting) self.direction = direction_index * 30 # print counting[direction_index], voice if voice >= self.collections.maxlen / 2 and counting[ direction_index] >= self.collections.maxlen / 3: return self.direction def _process(self, data): X = [0] * self.channels for channel in self.mask: x = data[channel::self.channels] # add window if self.window is not None: x = x * self.window X[channel] = np.fft.rfft(x, self.nfft) offset = [0] * len(self.pair) for i, v in enumerate(self.pair): CC = X[v[1]] * np.conj(X[v[0]]) # generalized CC /= np.abs(CC) + eps cc = np.fft.irfft(CC, n=self.nfft * self.interp) cc = np.concatenate((cc[-self.margin:], cc[:self.margin + 1])) cc = np.abs(cc) cc = cc - self.cc_baseline[i] # find max cross correlation index offset_max = np.argmax(cc) - self.margin offset[i] = (offset_max) / float(self.interp) # update baseline self.cc_baseline[i] = self.cc_baseline[i] + 0.01 * cc # if offset[0] == 0 and offset[1] == 0 and offset[2] == 0: # print cc_array min_index = np.argmin(np.abs(offset[:3])) theta = np.arcsin(offset[min_index] / self.margin_f) * 180 / np.pi if (min_index != 0 and offset[min_index - 1] < 0) or (min_index == 0 and offset[2] >= 0): best_guess = (360 - theta) % 360 else: best_guess = (180 + theta) best_guess = (best_guess + 30 + min_index * 60) % 360 direction = int((best_guess + 15) // 30 % 12) return offset, direction
if len(sys.argv) < 3: print('Usage: {} audio.wav out.wav'.format(sys.argv[0])) sys.exit(1) wav = wave.open(sys.argv[1], 'rb') rate = wav.getframerate() width = wav.getsampwidth() channels = wav.getnchannels() out = wave.open(sys.argv[2], 'wb') out.setnchannels(channels) out.setsampwidth(width) out.setframerate(rate) ap = AP(enable_ns=True) # set input/output stream format ap.set_stream_format(rate, channels, rate, channels) frames_size = int(rate * 10 / 1000) # only support processing 10ms audio each time frames_bytes = frames_size * width * channels while True: data = wav.readframes(frames_size) if len(data) != frames_bytes: break
if len(sys.argv) < 3: print('Usage: {} audio.wav out.wav'.format(sys.argv[0])) sys.exit(1) wav = wave.open(sys.argv[1], 'rb') rate = wav.getframerate() width = wav.getsampwidth() channels = wav.getnchannels() out = wave.open(sys.argv[2], 'wb') out.setnchannels(channels) out.setsampwidth(width) out.setframerate(rate) ap = AP(agc_type=1) # set input/output stream format ap.set_stream_format(rate, channels, rate, channels) frames_size = int(rate * 10 / 1000) frames_bytes = frames_size * width * channels ap.set_agc_target(-20) while True: data = wav.readframes(frames_size) if len(data) != frames_bytes: break