Esempio n. 1
0
    def __init__(self, channels=8, ns=False, agc=0):
        super(DOA, self).__init__()

        self.channels = channels
        self.mask = [0, 1, 2, 3, 4, 5]
        self.pair = [[0, 3], [1, 4], [2, 5]]

        self.frame_size = 160
        self.frame_bytes = self.frame_size * self.channels * 2

        self.ap = AP(enable_ns=ns, agc_type=agc)
        self.ap.set_stream_format(16000, 1)

        self.queue = queue.Queue()
        self.done = False

        self.collections = collections.deque(maxlen=16)

        # prepare hanning window for stft
        self.window = np.hanning(self.frame_size)
        # self.window = None

        # length of stft
        self.nfft = 1 << (self.frame_size * 2 - 1).bit_length()
        print('fft size: {}'.format(self.nfft))
        # self.nfft = 512

        self.margin_f = 0.064 * 16000 / 340.0

        self.interp = 2
        self.margin = int(self.margin_f * self.interp)

        self.cc_baseline = [0] * len(self.pair)
Esempio n. 2
0
    def __init__(self, rate=16000, channels=1):
        super(NS, self).__init__()

        self.rate = rate
        self.channels = channels
        self._buf = b''
        self._bytes_10ms = int(rate * channels * 2 / 100)

        self.ap = AP(enable_ns=True)
        self.ap.set_stream_format(rate, channels)
Esempio n. 3
0
class NS(Element):
    def __init__(self, rate=16000, channels=1):
        super(NS, self).__init__()

        self.ap = AP(enable_ns=True)
        self.ap.set_stream_format(rate, channels)

    def put(self, data):
        data = self.ap.process_stream(data)

        super(NS, self).put(data)
def test_ns():
    ap = AP(enable_ns=True)
    ap.set_ns_level(1)
    ap.set_stream_format(16000, 1)

    chunk = '\0\0' * 1600
    for _ in range(16):
        out = ap.process_stream(chunk)
Esempio n. 5
0
class NS(Element):
    def __init__(self, rate=16000, channels=1):
        super(NS, self).__init__()

        self.rate = rate
        self.channels = channels
        self._buf = b''
        self._bytes_10ms = int(rate * channels * 2 / 100)

        self.ap = AP(enable_ns=True)
        self.ap.set_stream_format(rate, channels)

    def put(self, data):
        self._buf += data
        while len(self._buf) >= self._bytes_10ms:
            data = self._buf[:self._bytes_10ms]
            self._buf = self._buf[self._bytes_10ms:]

            # webrtc audio processing only support processing 10 ms audio each time
            data = self.ap.process_stream(data)

            super(NS, self).put(data)
    def __init__(self, rate: int, channels: int, level: int = 0):
        """Creates a noise suppression element with the given configuration

        Args:
            rate (int): The audio sample rate, in Hz.
            channels (int): Number of channels
            level (int):  Level of aggressiveness of the noise suppression algorithm.
        """
        self.__channels = channels
        self.__rate = rate
        self.__frames_per_channel = int(rate * 0.01)
        self.__ap = AP(enable_ns=True)
        self.__ap.set_ns_level(level)
        self.__ap.set_stream_format(rate, channels)
Esempio n. 7
0
File: ns.py Progetto: tracek/denoise
def reduce_noise_and_write_output(input_path: str, ns: int, output_path=None):
    directory, filename = os.path.split(input_path)
    filename_no_ext, ext = os.path.splitext(filename)
    output_filepath = f'{filename_no_ext}_ns{ns}{ext}'

    if output_path:
        if os.path.isdir(output_path):
            output_filepath = os.path.join(output_path, output_filepath)
        else:
            output_filepath = output_path

    print(f'Processing {input_path} to {output_filepath}')

    with wave.open(input_path, 'rb') as wav, wave.open(output_filepath,
                                                       'wb') as out:
        rate = wav.getframerate()
        width = wav.getsampwidth()
        channels = wav.getnchannels()

        out.setnchannels(channels)
        out.setsampwidth(width)
        out.setframerate(rate)

        ap = AP(enable_ns=True)
        ap.set_ns_level(ns)
        ap.set_stream_format(rate, channels, rate, channels)

        frames_size = int(rate * 10 /
                          1000)  # only support processing 10ms audio each time
        frames_bytes = frames_size * width * channels

        while True:
            data = wav.readframes(frames_size)
            if len(data) != frames_bytes:
                break
            data_out = ap.process_stream(data)
            out.writeframes(data_out)
Esempio n. 8
0
    def __init__(self, rate=16000, channels=1):
        super(NS, self).__init__()

        self.ap = AP(enable_ns=True)
        self.ap.set_stream_format(rate, channels)
Esempio n. 9
0
class DOA(Element):
    def __init__(self, channels=8, ns=False, agc=0):
        super(DOA, self).__init__()

        self.channels = channels
        self.mask = [0, 1, 2, 3, 4, 5]
        self.pair = [[0, 3], [1, 4], [2, 5]]

        self.frame_size = 160
        self.frame_bytes = self.frame_size * self.channels * 2

        self.ap = AP(enable_ns=ns, agc_type=agc)
        self.ap.set_stream_format(16000, 1)

        self.queue = queue.Queue()
        self.done = False

        self.collections = collections.deque(maxlen=16)

        # prepare hanning window for stft
        self.window = np.hanning(self.frame_size)
        # self.window = None

        # length of stft
        self.nfft = 1 << (self.frame_size * 2 - 1).bit_length()
        print('fft size: {}'.format(self.nfft))
        # self.nfft = 512

        self.margin_f = 0.064 * 16000 / 340.0

        self.interp = 2
        self.margin = int(self.margin_f * self.interp)

        self.cc_baseline = [0] * len(self.pair)

    def put(self, data):
        self.queue.put(data)

    def start(self):
        self.done = False
        thread = threading.Thread(target=self.run)
        thread.daemon = True
        thread.start()

    def stop(self):
        pixel_ring.off()
        self.done = True

    def run(self):
        has_voice = 0
        buffer = ''
        count = 0
        pixel_ring_countdown = 0

        while not self.done:
            data = self.queue.get()
            buffer += data

            while len(buffer) >= self.frame_bytes:
                data = buffer[:self.frame_bytes]
                buffer = buffer[self.frame_bytes:]

                data = np.fromstring(data, dtype='int16')
                mono = data[0::self.channels].tostring()

                mono = self.ap.process_stream(mono)
                has_voice = self.ap.has_voice()

                # sys.stdout.write('1' if has_voice else '0')
                # sys.stdout.flush()

                offset, direction = self._process(data)

                self.collections.append([direction, offset, has_voice])

                count += 1
                if count >= self.collections.maxlen:
                    direction = self.get_direction()
                    if direction:
                        print('@ {}'.format(direction))

                        pixel_ring.wakeup(direction)
                        pixel_ring_countdown = 10
                    else:
                        if pixel_ring_countdown > 0:
                            pixel_ring_countdown -= 1
                            if pixel_ring_countdown == 0:
                                pixel_ring.off()

                    count = 0

                super(DOA, self).put(mono)

    def set_callback(self, callback):
        if callable(callback):
            self.on_detected = callback
        else:
            ValueError('The callback parameter is not callable')

    def get_direction(self):
        counting = [0] * 12
        voice = 0
        for d in self.collections:
            if d[2]:
                voice += 1

            counting[d[0]] += 1

        direction_index = np.argmax(counting)
        self.direction = direction_index * 30

        # print counting[direction_index], voice

        if voice >= self.collections.maxlen / 2 and counting[
                direction_index] >= self.collections.maxlen / 3:
            return self.direction

    def _process(self, data):
        X = [0] * self.channels
        for channel in self.mask:
            x = data[channel::self.channels]
            # add window
            if self.window is not None:
                x = x * self.window

            X[channel] = np.fft.rfft(x, self.nfft)

        offset = [0] * len(self.pair)

        for i, v in enumerate(self.pair):
            CC = X[v[1]] * np.conj(X[v[0]])
            # generalized
            CC /= np.abs(CC) + eps
            cc = np.fft.irfft(CC, n=self.nfft * self.interp)

            cc = np.concatenate((cc[-self.margin:], cc[:self.margin + 1]))

            cc = np.abs(cc)

            cc = cc - self.cc_baseline[i]

            # find max cross correlation index
            offset_max = np.argmax(cc) - self.margin
            offset[i] = (offset_max) / float(self.interp)

            # update baseline
            self.cc_baseline[i] = self.cc_baseline[i] + 0.01 * cc

        # if offset[0] == 0 and offset[1] == 0 and offset[2] == 0:
        #     print cc_array

        min_index = np.argmin(np.abs(offset[:3]))
        theta = np.arcsin(offset[min_index] / self.margin_f) * 180 / np.pi
        if (min_index != 0
                and offset[min_index - 1] < 0) or (min_index == 0
                                                   and offset[2] >= 0):
            best_guess = (360 - theta) % 360
        else:
            best_guess = (180 + theta)

        best_guess = (best_guess + 30 + min_index * 60) % 360

        direction = int((best_guess + 15) // 30 % 12)

        return offset, direction
if len(sys.argv) < 3:
    print('Usage: {} audio.wav out.wav'.format(sys.argv[0]))
    sys.exit(1)

wav = wave.open(sys.argv[1], 'rb')
rate = wav.getframerate()
width = wav.getsampwidth()
channels = wav.getnchannels()

out = wave.open(sys.argv[2], 'wb')
out.setnchannels(channels)
out.setsampwidth(width)
out.setframerate(rate)

ap = AP(enable_ns=True)

# set input/output stream format
ap.set_stream_format(rate, channels, rate, channels)

frames_size = int(rate * 10 / 1000)         # only support processing 10ms audio each time 
frames_bytes = frames_size * width * channels




while True:
    data = wav.readframes(frames_size)
    if len(data) != frames_bytes:
        break
Esempio n. 11
0
if len(sys.argv) < 3:
    print('Usage: {} audio.wav out.wav'.format(sys.argv[0]))
    sys.exit(1)

wav = wave.open(sys.argv[1], 'rb')
rate = wav.getframerate()
width = wav.getsampwidth()
channels = wav.getnchannels()

out = wave.open(sys.argv[2], 'wb')
out.setnchannels(channels)
out.setsampwidth(width)
out.setframerate(rate)

ap = AP(agc_type=1)

# set input/output stream format
ap.set_stream_format(rate, channels, rate, channels)

frames_size = int(rate * 10 / 1000)
frames_bytes = frames_size * width * channels

ap.set_agc_target(-20)


while True:
    data = wav.readframes(frames_size)
    if len(data) != frames_bytes:
        break