Example #1
0
    def test_callback(self):

        tokens = []

        def callback(data, start, end):
            tokens.append((data, start, end))

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=5,
            max_length=8,
            max_continuous_silence=3,
            init_min=3,
            init_max_silence=3,
            mode=0,
        )

        data_source = StringDataSource("aaAAAAAAAAAAAAa")
        #                                 ^      ^^   ^
        #                                 2      910  14

        tokenizer.tokenize(data_source, callback=callback)

        self.assertEqual(
            len(tokens),
            2,
            msg="wrong number of tokens, expected: 1, found: {0} ".format(
                len(tokens)),
        )
def split(filename='g1238-20181214-081712-1544750232.37681.wav'):
    sr, samples = wavfile.read(filename=filename, mmap=True)
    #print(len(samples))
    plt.plot(samples)
    asource = ADSFactory.ads(filename=filename, record=False)
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=50)
    # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
    # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
    # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
    # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 10 == 300 ms
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=100,
                                max_length=500,
                                max_continuous_silence=50)
    asource.open()
    tokens = tokenizer.tokenize(asource)
    stack = []
    sum = []
    for i, t in enumerate(tokens):
        #print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2]))
        stack.append([t[1] * 80, t[2] * 80])
        sum.append((t[2] * 80 - t[1] * 80) / 8000)
        wavfile.write('token_' + str(i) + '.wav', sr,
                      samples[t[1] * 80:t[2] * 80])  #write to file
    asource.close()
    print(sum)
    return stack
Example #3
0
def make_auditok_detector(sample_rate=100):
    bytes_per_frame = 2
    frames_per_window = FRAME_RATE // sample_rate
    validator = AudioEnergyValidator(sample_width=bytes_per_frame,
                                     energy_threshold=50)
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=0.2 * sample_rate,
                                max_length=int(5 * sample_rate),
                                max_continuous_silence=0.25 * sample_rate)

    def _detect(asegment):
        asource = BufferAudioSource(data_buffer=asegment,
                                    sampling_rate=FRAME_RATE,
                                    sample_width=bytes_per_frame,
                                    channels=1)
        ads = ADSFactory.ads(audio_source=asource, block_dur=1. / sample_rate)
        ads.open()
        tokens = tokenizer.tokenize(ads)
        length = (len(asegment) // bytes_per_frame + frames_per_window -
                  1) // frames_per_window
        media_bstring = np.zeros(length + 1, dtype=int)
        for token in tokens:
            media_bstring[token[1]] += 1
            media_bstring[token[2] + 1] -= 1
        return np.cumsum(media_bstring)[:-1] > 0

    return _detect
    def transcribe_audio(self, stereo_path, channels_to_process):

        if not os.path.isfile(stereo_path):
            raise Exception("Audio file does not exist.")

        data = self.split_to_mono(stereo_path)

        a_leg = data['a_leg']
        b_leg = data['b_leg']

        data['a_leg'] = None
        data['b_leg'] = None

        validator = AudioEnergyValidator(sample_width=data['frame_width'], energy_threshold=45)
        trimmer = StreamTokenizer(validator,
                                  min_length=self.min_segment_length,
                                  max_length=self.max_segment_length,
                                  max_continuous_silence=self.max_continuous_silence,
                                  mode=StreamTokenizer.DROP_TAILING_SILENCE)

        segments = []
        if channels_to_process in ['A', 'AB']:
            a_source = ADSFactory.ads(audio_source=a_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            a_source.open()
            trimmer.tokenize(a_source, callback=lambda data, start, end: segments.append(("A", data, start, end)))

        if channels_to_process in ['B', 'AB']:
            b_source = ADSFactory.ads(audio_source=b_leg, record=True, block_size=data['frame_rate'] / self.divisor)
            b_source.open()
            trimmer.tokenize(b_source, callback=lambda data, start, end: segments.append(("B", data, start, end)))

        segments = sorted(segments, key=lambda x: x[3])
        self.batch(segments, data['duration'], data['frame_rate'], data['frame_width'], data['nchannels'])
Example #5
0
def getSplitAudioDurationListBetweenSilence(fileName,eachAudioLen,silencePeriod,energyThreshold=55):
    try:
        # We set the `record` argument to True so that we can rewind the source
        asource = ADSFactory.ads(filename=fileName, record=False)

        validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=energyThreshold)

        # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
        # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
        # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
        # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms 
        tokenizer = StreamTokenizer(validator=validator, min_length=400, max_length=eachAudioLen*100, max_continuous_silence=silencePeriod*100)

        asource.open()
        tokens = tokenizer.tokenize(asource)

        # Play detected regions back
        #player = player_for(asource)

        # Rewind and read the whole signal
        #asource.rewind()
        #original_signal = []

        #while True:
        #    w = asource.read()
        #    if w is None:
        #        break
        #    original_signal.append(w)


        #original_signal = b''.join(original_signal)
        #player.play(original_signal)

        #print("\n ** playing detected regions...\n")
        #for i,t in enumerate(tokens):
        #    print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2]))
            #data = b''.join(t[0])
            #player.play(data)

        #assert len(tokens) == 8

        asource.close()
        #player.stop()
    except KeyboardInterrupt:

        #player.stop()
        asource.close()
        #sys.exit(0)

    except Exception as e:

        sys.stderr.write(str(e) + "\n")
        #sys.exit(1)
    return tokens
Example #6
0
def _make_auditok_detector(
    sample_rate: int, frame_rate: int, non_speech_label: float
) -> Callable[[bytes], np.ndarray]:
    try:
        from auditok import (
            BufferAudioSource,
            ADSFactory,
            AudioEnergyValidator,
            StreamTokenizer,
        )
    except ImportError as e:
        logger.error(
            """Error: auditok not installed!
        Consider installing it with `pip install auditok`. Note that auditok
        is GPLv3 licensed, which means that successfully importing it at
        runtime creates a derivative work that is GPLv3 licensed. For personal
        use this is fine, but note that any commercial use that relies on
        auditok must be open source as per the GPLv3!*
        *Not legal advice. Consult with a lawyer.
        """
        )
        raise e
    bytes_per_frame = 2
    frames_per_window = frame_rate // sample_rate
    validator = AudioEnergyValidator(sample_width=bytes_per_frame, energy_threshold=50)
    tokenizer = StreamTokenizer(
        validator=validator,
        min_length=0.2 * sample_rate,
        max_length=int(5 * sample_rate),
        max_continuous_silence=0.25 * sample_rate,
    )

    def _detect(asegment: bytes) -> np.ndarray:
        asource = BufferAudioSource(
            data_buffer=asegment,
            sampling_rate=frame_rate,
            sample_width=bytes_per_frame,
            channels=1,
        )
        ads = ADSFactory.ads(audio_source=asource, block_dur=1.0 / sample_rate)
        ads.open()
        tokens = tokenizer.tokenize(ads)
        length = (
            len(asegment) // bytes_per_frame + frames_per_window - 1
        ) // frames_per_window
        media_bstring = np.zeros(length + 1)
        for token in tokens:
            media_bstring[token[1]] = 1.0
            media_bstring[token[2] + 1] = non_speech_label - 1.0
        return np.clip(np.cumsum(media_bstring)[:-1], 0.0, 1.0)

    return _detect
Example #7
0
def read_split_dir(file):
    f = sf.SoundFile(file)
    #duration of file in seconds
    duration = len(f) / f.samplerate

    if duration <= 4:
        print(file, 'untouched')
    else:
        #Get original filename
        name = os.path.splitext(file)[0]

        tempsound = AudioSegment.from_wav(file)
        tempsound = tempsound.set_channels(1)
        tempsound.export('0wavtmp_' + file, format="wav")
        tmpfile = '0wavtmp_' + file

        # We set the `record` argument to True so that we can rewind the source
        asource = ADSFactory.ads(filename=tmpfile, record=True)

        validator = AudioEnergyValidator(
            sample_width=asource.get_sample_width(), energy_threshold=50)

        # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
        # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
        # max_length=4000 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
        # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms
        tokenizer = StreamTokenizer(validator=validator,
                                    min_length=500,
                                    max_length=4000,
                                    max_continuous_silence=100)

        asource.open()
        tokens = tokenizer.tokenize(asource)

        for index, t in enumerate(tokens):
            #print("Token starts at {0} and ends at {1}".format(t[1], t[2]))
            newAudio = AudioSegment.from_wav(file)
            newAudio = newAudio[t[1]:t[2]]

            chunk_name = "{}_clip{}.wav".format(name, index)
            print("Generating", chunk_name)
            newAudio.export(
                chunk_name,
                format="wav")  #Exports to a wav file in the current path.

        #Remove the temporary file we made earlier
        os.remove(tmpfile)
        #Remove the original file to avoid confusion
        os.remove(file)
    def __init__(self):
        self.asource = ADSFactory.ads(record=True, max_time=4)
        self.validator = AudioEnergyValidator(sample_width=2,
                                              energy_threshold=50)
        self.tokenizer = StreamTokenizer(validator=self.validator,
                                         min_length=20,
                                         max_length=1000,
                                         max_continuous_silence=30)
        self.player = player_for(self.asource)

        self.model = self.load_cnn('../model/final_cnn_model.json',
                                   '../model/weights_final_cnn.h5')
        self.model.compile(loss='categorical_crossentropy',
                           optimizer='sgd',
                           metrics=['accuracy'])
Example #9
0
    def test_STRICT_MIN_LENGTH_and_DROP_TAILING_SILENCE(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=5,
            max_length=8,
            max_continuous_silence=3,
            init_min=3,
            init_max_silence=3,
            mode=StreamTokenizer.STRICT_MIN_LENGTH
            | StreamTokenizer.DROP_TRAILING_SILENCE,
        )

        data_source = StringDataSource("aaAAAAAAAAAAAAaa")
        #                                 ^      ^
        #                                 2      8

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            1,
            msg="wrong number of tokens, expected: 1, found: {0} ".format(
                len(tokens)),
        )
        tok1 = tokens[0]

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AAAAAAAA",
            msg=("wrong data for token 1, expected: 'AAAAAAAA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            2,
            msg=("wrong start frame for token 1, expected: 2, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            9,
            msg=("wrong end frame for token 1, expected: 9, found: {0} "
                 ).format(end),
        )
Example #10
0
    def calibrate(self):
        '''
        This method calibrates the MinMaxScaler, self.scaler, by capturing 10 seconds
        of audio and applying MinMaxScaler fit method.
        See sklearn.preprocessing.MinMaxScaler for details.
        
        This is redundant, scaling is not necessary.
        
        '''
        a = raw_input(
            "Calibrate normalisation, press return then make noises from your mouth hole."
        )
        if self.audioPath == None:
            asource = ADSFactory.ads(sampling_rate=self.sr, max_time=10)
        else:
            asource = ADSFactory.ads(filename=self.audioPath,
                                     sampling_rate=self.sr,
                                     max_time=10)

        validator = AudioEnergyValidator(
            sample_width=asource.get_sample_width(),
            energy_threshold=self.energy)

        tokenizer = StreamTokenizer(validator=validator,
                                    min_length=self.min_len,
                                    max_length=self.max_len,
                                    max_continuous_silence=self.max_con_si)

        def calib_callback(data, start, end):
            audio = np.fromstring(data[0], dtype=np.int8)
            self.scaler.fit_transform(np.swapaxes(np.asarray([audio]), 0, 1))
            print "Audio sample found {0}--{1}".format(start, end)

        asource.open()

        tokenizer.tokenize(asource, callback=calib_callback)
        print "Scaler paramaters found: min: {0} max: {1}".format(
            self.scaler.data_min_, self.scaler.data_max_)

        print "calibration done"
        self.mini = self.scaler.data_min_
        self.maxi = self.scaler.data_max_
Example #11
0
    def test_min_length_1_init_max_length_1(self):

        tokenizer = StreamTokenizer(self.A_validator,
                                    min_length=1,
                                    max_length=1,
                                    max_continuous_silence=0,
                                    init_min=0,
                                    init_max_silence=0,
                                    mode=0)

        data_source = StringDataSource(
            "AAaaaAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaAAAAA")

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            21,
            msg="wrong number of tokens, expected: 21, found: {0} ".format(
                len(tokens)))
Example #12
0
    def test_DROP_TAILING_SILENCE(self):

        tokenizer = StreamTokenizer(self.A_validator,
                                    min_length=5,
                                    max_length=10,
                                    max_continuous_silence=2,
                                    init_min=3,
                                    init_max_silence=3,
                                    mode=StreamTokenizer.DROP_TAILING_SILENCE)

        data_source = StringDataSource("aaAAAAAaaaaa")
        #                                 ^   ^
        #                                 2   6

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            1,
            msg="wrong number of tokens, expected: 1, found: {0} ".format(
                len(tokens)))
        tok1 = tokens[0]

        data = ''.join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AAAAA",
            msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".
            format(data))
        self.assertEqual(
            start,
            2,
            msg="wrong start frame for token 1, expected: 2, found: {0} ".
            format(start))
        self.assertEqual(
            end,
            6,
            msg="wrong end frame for token 1, expected: 6, found: {0} ".format(
                end))
    def __init__(self, _useGui):
        # parametros de áudio
        max_length = 1000000
        max_interval = 12000
        max_continuous_silence = 500
        min_length = 150

        self.sample_rate = 48000
        self.asource = ADSFactory.ads(record=True,
                                      max_time=max_length,
                                      sampling_rate=self.sample_rate)

        self.sample_width = self.asource.get_sample_width()
        self.channels = self.asource.get_channels()
        # START VALIDATOR
        self.validator = AudioEnergyValidator(
            sample_width=self.sample_width, energy_threshold=energy_threshold)
        self.tokenizer = StreamTokenizer(
            validator=self.validator,
            min_length=min_length,
            max_length=max_length,
            max_continuous_silence=max_continuous_silence)

        self.audio_folder = 'recordings/' + '{:%Y-%m-%d_%H-%M-%S}'.format(
            datetime.datetime.now()) + '/'
        if not os.path.exists(os.path.dirname(self.audio_folder)):
            try:
                os.makedirs(os.path.dirname(self.audio_folder))
            except OSError as exc:  # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raiseRec
        os.chmod('recordings', 0o777)
        os.chmod(self.audio_folder, 0o777)
        self.MODE = 'ECHO'

        self.useGui = _useGui

        if self.useGui:
            root = Tk()
            self.display = GUI(root, True)
            self.display.display_image()
Example #14
0
    def configure(self, rf):
        # Setting up rpc port
        self.portsList["rpc"] = yarp.Port()
        self.portsList["rpc"].open("/sentence_tokenizer/rpc:i")
        self.attach(self.portsList["rpc"])

        self.portsList["audio_out"] = yarp.BufferedPortBottle()
        self.portsList["audio_out"].open("/sentence_tokenizer/audio:o")

        # Setting up hotword detection
        self.hotword_detector = snowboydecoder.HotwordDetector(self.hotword_model, sensitivity=self.hotword_sensitivity)

        # Setting up audio tokenizer to split sentences
        self.audio_source = ADSFactory.ads(record=True, max_time=self.tok_record_duration, block_dur=self.tok_window)
        self.tok_validator = AudioEnergyValidator(sample_width=self.audio_source.get_sample_width(),
                                                  energy_threshold=self.tok_energy_threshold)
        self.tokenizer_mode = StreamTokenizer.DROP_TRAILING_SILENCE
        self.tokenizer = StreamTokenizer(validator=self.tok_validator,
                                         min_length=self.tok_min_len,
                                         max_length=self.tok_max_len,
                                         max_continuous_silence=self.tok_max_silence_duration,
                                         mode=self.tokenizer_mode)

        if self.echo_enabled:
            self.echo_thread = threading.Thread(target=self.replayAudio)
            self.echo_thread.start()

        if self.hotword_enabled:
            print("Waiting for hotword to start interaction")
            # self.hotword_detector.start(detected_callback=self.detected_callback,
            #                             interrupt_check=self.interrupt_callback,
            #                             sleep_time=self.hotword_loop_time)
            print("Hotword detected. Starting tokenizer thread")
        else:
            print "Starting tokenizer thread"

        self.asr = sr.Recognizer()

        with open('google_credentials.json', 'r') as credentials:
            self.google_credentials = credentials.read()
        return True
Example #15
0
    def test_min_5_max_10_max_continuous_silence_1(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=5,
            max_length=10,
            max_continuous_silence=1,
            init_min=3,
            init_max_silence=3,
            mode=0,
        )

        data_source = StringDataSource("aaaAAAAAaAAAAAAaaAAAAAAAAAa")
        #                                  ^        ^^ ^ ^        ^
        #                                  3       12131517      26
        #                                         (12 13 15 17)

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            3,
            msg="wrong number of tokens, expected: 3, found: {0} ".format(
                len(tokens)),
        )
        tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AAAAAaAAAA",
            msg=("wrong data for token 1, expected: 'AAAAAaAAAA', "
                 "found: '{0}' ").format(data),
        )
        self.assertEqual(
            start,
            3,
            msg=("wrong start frame for token 1, expected: 3, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            12,
            msg=("wrong end frame for token 1, expected: 10, found: {0} "
                 ).format(end),
        )

        data = "".join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAa",
            msg=("wrong data for token 1, expected: 'AAa', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            13,
            msg=("wrong start frame for token 1, expected: 9, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            15,
            msg=("wrong end frame for token 1, expected: 14, found: {0} "
                 ).format(end),
        )

        data = "".join(tok3[0])
        start = tok3[1]
        end = tok3[2]
        self.assertEqual(
            data,
            "AAAAAAAAAa",
            msg=("wrong data for token 1, expected: 'AAAAAAAAAa', "
                 "found: '{0}' ").format(data),
        )
        self.assertEqual(
            start,
            17,
            msg=("wrong start frame for token 1, expected: 17, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            26,
            msg=("wrong end frame for token 1, expected: 26, found: {0} "
                 ).format(end),
        )
Example #16
0
    def test_min_length_4_init_max_length_5(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=4,
            max_length=5,
            max_continuous_silence=4,
            init_min=3,
            init_max_silence=3,
            mode=0,
        )

        data_source = StringDataSource(
            "aAaaaAaAaaAaAaaaaaAAAAAAAAaaaaaaAAAAAaaaaaAAaaAaa")
        #                      ^   ^^   ^    ^   ^     ^   ^
        #                      18 2223  27   32  36    42  46

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            4,
            msg="wrong number of tokens, expected: 4, found: {0} ".format(
                len(tokens)),
        )
        tok1, tok2, tok3, tok4 = tokens[0], tokens[1], tokens[2], tokens[3]

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AAAAA",
            msg=("wrong data for token 1, expected: 'AAAAA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            18,
            msg=("wrong start frame for token 1, expected: 18, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            22,
            msg=("wrong end frame for token 1, expected: 22, found: {0} "
                 ).format(end),
        )

        data = "".join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAAaa",
            msg=("wrong data for token 1, expected: 'AAAaa', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            23,
            msg=("wrong start frame for token 1, expected: 23, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            27,
            msg=("wrong end frame for token 1, expected: 27, found: {0} "
                 ).format(end),
        )

        data = "".join(tok3[0])
        start = tok3[1]
        end = tok3[2]
        self.assertEqual(
            data,
            "AAAAA",
            msg=("wrong data for token 1, expected: 'AAAAA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            32,
            msg=("wrong start frame for token 1, expected: 1, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            36,
            msg=("wrong end frame for token 1, expected: 7, found: {0} "
                 ).format(end),
        )

        data = "".join(tok4[0])
        start = tok4[1]
        end = tok4[2]
        self.assertEqual(
            data,
            "AAaaA",
            msg=("wrong data for token 2, expected: 'AAaaA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            42,
            msg=("wrong start frame for token 2, expected: 17, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            46,
            msg=("wrong end frame for token 2, expected: 22, found: {0} "
                 ).format(end),
        )
Example #17
0
    def test_min_length_10_init_max_length_20(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=10,
            max_length=20,
            max_continuous_silence=4,
            init_min=3,
            init_max_silence=3,
            mode=0,
        )

        data_source = StringDataSource(
            "aAaaaAaAaaAaAaaaaaaAAAAAaaaaaaAAAAAaaAAaaAAA")
        #     ^              ^             ^            ^
        #     1              16            30           45

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            2,
            msg="wrong number of tokens, expected: 2, found: {0} ".format(
                len(tokens)),
        )
        tok1, tok2 = tokens[0], tokens[1]

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AaaaAaAaaAaAaaaa",
            msg=("wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', "
                 "found: '{0}' ").format(data),
        )
        self.assertEqual(
            start,
            1,
            msg=("wrong start frame for token 1, expected: 1, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            16,
            msg=("wrong end frame for token 1, expected: 16, found: {0} "
                 ).format(end),
        )

        data = "".join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAAAAaaAAaaAAA",
            msg=("wrong data for token 2, expected: 'AAAAAaaAAaaAAA', "
                 "found: '{0}' ").format(data),
        )
        self.assertEqual(
            start,
            30,
            msg=("wrong start frame for token 2, expected: 30, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            43,
            msg=("wrong end frame for token 2, expected: 43, found: {0} "
                 ).format(end),
        )
Example #18
0
    def test_init_min_0_init_max_silence_0(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=5,
            max_length=20,
            max_continuous_silence=4,
            init_min=0,
            init_max_silence=0,
            mode=0,
        )

        data_source = StringDataSource("aAaaaAaAaaAaAaaaaaaaAAAAAAAA")
        #                                ^              ^   ^      ^
        #                                2              16  20     27
        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            2,
            msg="wrong number of tokens, expected: 2, found: {0} ".format(
                len(tokens)),
        )
        tok1, tok2 = tokens[0], tokens[1]

        # tok1[0]: data
        # tok1[1]: start frame (included)
        # tok1[2]: end frame (included)

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AaaaAaAaaAaAaaaa",
            msg=("wrong data for token 1, expected: 'AaaaAaAaaAaAaaaa', "
                 "found: {0} ").format(data),
        )
        self.assertEqual(
            start,
            1,
            msg=("wrong start frame for token 1, expected: 1, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            16,
            msg=("wrong end frame for token 1, expected: 16, found: {0} "
                 ).format(end),
        )

        data = "".join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAAAAAAA",
            msg=("wrong data for token 1, expected: 'AAAAAAAA', found: {0} "
                 ).format(data),
        )
        self.assertEqual(
            start,
            20,
            msg=("wrong start frame for token 2, expected: 20, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            27,
            msg=("wrong end frame for token 2, expected: 27, found: {0} "
                 ).format(end),
        )
Example #19
0
    original_signal = b''.join(original_signal)

    # rewind source
    asource.rewind()

    # Create a validator with an energy threshold of 50
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=50)

    # Create a tokenizer with an unlimited token length and continuous silence within a token
    # Note the DROP_TRAILING_SILENCE mode that will ensure removing trailing silence
    trimmer = StreamTokenizer(validator,
                              min_length=20,
                              max_length=99999999,
                              max_continuous_silence=9999999,
                              mode=StreamTokenizer.DROP_TRAILING_SILENCE,
                              init_min=3,
                              init_max_silence=1)

    tokens = trimmer.tokenize(asource)

    # Make sure we only have one token
    assert len(tokens) == 1, "Should have detected one single token"

    trimmed_signal = b''.join(tokens[0][0])

    player = player_for(asource)

    print(
        "\n ** Playing original signal (with leading and trailing silence)...")
Example #20
0
   # set up audio source  
   asource = ADSFactory.ads(record=True, max_time = duration, sampling_rate = sample_rate)

   #check os system and set sample rate 48000 for Linux (Raspberry Pi)
   _os = platform.system()
   if (_os == 'Darwin') or (_os == 'Windows'): # macOs
      sample_rate = asource.get_sampling_rate()
   
   # get sample width and channels from ads factory 
   sample_width = asource.get_sample_width()
   channels = asource.get_channels()
   
   # START VALIDATOR
   validator = AudioEnergyValidator(sample_width=sample_width, energy_threshold = energy_threshold)
   tokenizer = StreamTokenizer(validator=validator, min_length=80, max_length=RECORD_SECONDS, max_continuous_silence=300) #  

   # LOAD PYAUDIO 
   p = pyaudio.PyAudio()

   # start classe memoria
   _memoria = memoria.Memoria()
   
   if TRANSCRIPTION:
      # LOAD RECOGNIZER
      recognizer = sr.Recognizer("pt-BR")     
      # nltk vars
      stop_words = nltk.corpus.stopwords.words('portuguese')
      stemmer = nltk.stem.RSLPStemmer()           

   # print out sound devices
Example #21
0
def find_voice_segments(audio_file, music_time_list):
    segments = []
    formats = {1: numpy.int8, 2: numpy.int16, 4: numpy.int32}
    #[Fs_cr, x_cr] = aIO.readAudioFile(input_audio_audio_file)
    #[Fs_ce, x_ce] = aIO.readAudioFile(callee_audio_file)
    #segments = aS.silenceRemoval(x_cr, Fs_cr, 0.010, 0.010, smoothWindow=3,Weight=0.3,plot=False)
    #print(segments)
    #callee_segments = aS.silenceRemoval(x_ce, Fs_ce, 0.010, 0.010, smoothWindow=5,Weight=0.3,plot=False)
    #print(callee_segments)

    test_source = ADSFactory.ads(filename=audio_file, record=False)
    test_source.open()
    i = 0
    max_value = 0.0
    a = numpy.empty([], dtype=numpy.float64)
    b = numpy.empty([], dtype=numpy.float64)
    while True:
        frame = test_source.read()

        if frame is None:
            break

        signal = numpy.array(numpy.frombuffer(
            frame, dtype=formats[test_source.get_sample_width()]),
                             dtype=numpy.float64)
        energy = float(numpy.dot(signal, signal)) / len(signal)
        max_value = max(max_value, energy)
        i += 1
        b = numpy.append(b, [energy])

    #diff = max_value - numpy.mean(b)
    #print(10. * numpy.log10(0.3*diff))
    log_max = 10. * numpy.log10(max_value)
    log_mean = 10. * numpy.log10(numpy.mean(b))
    tmp = log_max - log_mean
    threshold = log_mean + 0.4 * tmp
    #print(threshold)

    test_source.close()
    asource = ADSFactory.ads(filename=audio_file, record=False)
    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=threshold)
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=300,
                                max_length=99999999,
                                max_continuous_silence=300)
    player = player_for(asource)

    asource.open()
    tokens = tokenizer.tokenize(asource)
    for i, t in enumerate(tokens):
        segment_begin = t[1] * 10
        segment_end = t[2] * 10

        if len(music_time_list) > 0:
            for item in music_time_list:
                # if segment end includes music begin
                if segment_end > item[0]:
                    #include segment before music
                    segments.append([segment_begin, item[0]])
                    #save stamps for incluing segment after music
                    segment_begin = item[1]
                    # remove music segment from list
                    # to not use it in further
                    music_time_list.remove(item)

        segments.append([segment_begin, segment_end])

    asource.close()
    return segments
Example #22
0
    def extractEvents(path, patientID):

        yname = os.path.basename(path)
        yname = yname[:len(yname) - 4]

        dest_path = '/home/pi/recordings/' + patientID + '/' + yname + '/'
        if not os.path.exists(dest_path):
            os.makedirs(dest_path)

        fsoriginal, y = wavfile.read(path)  # read audio file
        try:

            r, c = np.shape(y)
            if c > 1:
                y = np.delete(y, 1, axis=1)
                # print("audio file shape:  ", numpy.shape(y))
        except:
            print(' ')

        wavfile.write('/home/pi/coughanalysis_ann/sample.wav',
                      data=y,
                      rate=44100)

        asource = ADSFactory.ads(
            filename='/home/pi/coughanalysis_ann/sample.wav', record=True)

        validator = AudioEnergyValidator(
            sample_width=asource.get_sample_width(), energy_threshold=65)

        # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
        # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
        # max_length=4000 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
        # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms

        # For a sampling rate of 16KHz (16000 samples per second), we have 160 samples for 10 ms.

        tokenizer = StreamTokenizer(validator=validator,
                                    min_length=10,
                                    max_length=1000,
                                    max_continuous_silence=40)

        asource.open()
        tokens = tokenizer.tokenize(asource)

        # Play detected regions back

        # player = player_for(asource)

        # Rewind and read the whole signal
        asource.rewind()
        original_signal = []

        while True:
            w = asource.read()
            if w is None:
                break
            original_signal.append(w)

        original_signal = ''.join(original_signal)

        # print("Playing the original file...")
        # player.play(original_signal)

        # print("playing detected regions...")
        count = 0
        for t in tokens:
            # print("Token starts at {0} and ends at {1}".format(t[1], t[2]))
            data = ''.join(t[0])
            # player.play(data)

            fp = wave.open(dest_path + yname + str(count) + '.wav', "w")
            fp.setnchannels(asource.get_channels())
            fp.setsampwidth(asource.get_sample_width())
            fp.setframerate(asource.get_sampling_rate())
            fp.writeframes(data)
            fp.close()
            count += 1

        return dest_path
Example #23
0
    def test_init_min_3_init_max_silence_0(self):

        tokenizer = StreamTokenizer(self.A_validator,
                                    min_length=5,
                                    max_length=20,
                                    max_continuous_silence=4,
                                    init_min=3,
                                    init_max_silence=0,
                                    mode=0)

        data_source = StringDataSource(
            "aAaaaAaAaaAaAaaaaaAAAAAAAAAaaaaaaAAAAA")
        #                                                 ^           ^  ^   ^
        #                                                 18          30 33  37

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            2,
            msg="wrong number of tokens, expected: 2, found: {0} ".format(
                len(tokens)))
        tok1, tok2 = tokens[0], tokens[1]

        data = ''.join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AAAAAAAAAaaaa",
            msg=
            "wrong data for token 1, expected: 'AAAAAAAAAaaaa', found: '{0}' ".
            format(data))
        self.assertEqual(
            start,
            18,
            msg="wrong start frame for token 1, expected: 18, found: {0} ".
            format(start))
        self.assertEqual(
            end,
            30,
            msg="wrong end frame for token 1, expected: 30, found: {0} ".
            format(end))

        data = ''.join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAAAA",
            msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".
            format(data))
        self.assertEqual(
            start,
            33,
            msg="wrong start frame for token 2, expected: 33, found: {0} ".
            format(start))
        self.assertEqual(
            end,
            37,
            msg="wrong end frame for token 2, expected: 37, found: {0} ".
            format(end))
Example #24
0
    def runAuditok(self):
        '''
        This method captures sound from the audio source specified in self.audioPath
        if self.audioPath is None, the built in microphone is used.
        
        
        '''

        #a = raw_input("waiting for start")
        if self.audioPath == None:
            self.asource = ADSFactory.ads(sampling_rate=self.sr)
        else:
            self.asource = ADSFactory.ads(filename=self.audioPath,
                                          sampling_rate=self.sr)

        self.validator = AudioEnergyValidator(
            sample_width=self.asource.get_sample_width(),
            energy_threshold=self.energy)

        self.tokenizer = StreamTokenizer(
            validator=self.validator,
            min_length=self.min_len,
            max_length=self.max_len,
            max_continuous_silence=self.max_con_si)

        self.player = player_for(self.asource)

        self.prev_data = np.zeros([1])

        def audio_callback(data, start, end):

            if not np.array_equal(data, self.prev_data):
                self.sendTrigger()  # send notice that audio has been detected

                print("Acoustic activity at: {0}--{1}".format(start, end))

                stamp = (start, end, self.chunk_count)

                if self.record:
                    self.saveAudio(data)

                copied = []
                for x in data:

                    np_data = np.frombuffer(x, dtype=np.uint8)
                    #print np_data
                    copied.append(np_data)

                data_rs = self.reshapeAudio(np.asarray(copied))

                self.sendAudio(data_rs, stamp)

                self.prev_data = data
                if self.PLAYBACK:
                    print "playing audio"
                    self.playback(data_rs)

                self.chunk_count += 1

        self.asource.open()
        self.sendTrigger(
        )  # send notice that the audio has started to be processed
        self.tokenizer.tokenize(self.asource, callback=audio_callback)
        sys.exit(0)
Example #25
0
    #check os system and set sample rate 48000 for Linux (Raspberry Pi)
    _os = platform.system()
    if (_os == 'Darwin') or (_os == 'Windows'):  # macOs
        sample_rate = asource.get_sampling_rate()

    # get sample width and channels from ads factory
    sample_width = asource.get_sample_width()
    channels = asource.get_channels()

    # START VALIDATOR
    validator = AudioEnergyValidator(sample_width=sample_width,
                                     energy_threshold=energy_threshold)
    tokenizer = StreamTokenizer(
        validator=validator,
        min_length=min_length,
        max_length=max_length,
        max_continuous_silence=max_continuous_silence)  #

    # LOAD PYAUDIO
    p = pyaudio.PyAudio()

    # start classe memoria
    _memoria = memoria.Memoria()

    # gui vars
    if GUI:
        root = Tk()
        display = GUI(root)

    if TRANSCRIPTION:
try:

    # We set the `record` argument to True so that we can rewind the source
    asource = ADSFactory.ads(
        filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True)

    validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                     energy_threshold=65)

    # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate())
    # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms
    # max_length=400 :  maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds
    # max_continuous_silence=30 : maximum length of a tolerated  silence within a valid audio activity is 30 * 30 == 300 ms
    tokenizer = StreamTokenizer(validator=validator,
                                min_length=20,
                                max_length=400,
                                max_continuous_silence=30)

    asource.open()
    tokens = tokenizer.tokenize(asource)

    # Play detected regions back
    player = player_for(asource)

    # Rewind and read the whole signal
    asource.rewind()
    original_signal = []

    while True:
        w = asource.read()
        if w is None:
Example #27
0
    def test_init_min_3_init_max_silence_2(self):

        tokenizer = StreamTokenizer(
            self.A_validator,
            min_length=5,
            max_length=20,
            max_continuous_silence=4,
            init_min=3,
            init_max_silence=2,
            mode=0,
        )

        data_source = StringDataSource(
            "aAaaaAaAaaAaAaaaaaaAAAAAAAAAaaaaaaaAAAAA")
        #         ^          ^  ^           ^   ^   ^
        #         5          16 19          31  35  39
        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            3,
            msg="wrong number of tokens, expected: 3, found: {0} ".format(
                len(tokens)),
        )
        tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]

        data = "".join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AaAaaAaAaaaa",
            msg=("wrong data for token 1, expected: 'AaAaaAaA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            5,
            msg=("wrong start frame for token 1, expected: 5, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            16,
            msg=("wrong end frame for token 1, expected: 16, found: {0} "
                 ).format(end),
        )

        data = "".join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAAAAAAAAaaaa",
            msg=("wrong data for token 2, expected: 'AAAAAAAAAaaaa', "
                 "found: '{0}' ").format(data),
        )
        self.assertEqual(
            start,
            19,
            msg=("wrong start frame for token 2, expected: 19, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            31,
            msg=("wrong end frame for token 2, expected: 31, found: {0} "
                 ).format(end),
        )

        data = "".join(tok3[0])
        start = tok3[1]
        end = tok3[2]
        self.assertEqual(
            data,
            "AAAAA",
            msg=("wrong data for token 3, expected: 'AAAAA', found: '{0}' "
                 ).format(data),
        )
        self.assertEqual(
            start,
            35,
            msg=("wrong start frame for token 2, expected: 35, found: {0} "
                 ).format(start),
        )
        self.assertEqual(
            end,
            39,
            msg=("wrong end frame for token 2, expected: 39, found: {0} "
                 ).format(end),
        )
'''
# record = True so that we'll be able to rewind the source.
# max_time = 10: read 10 seconds from the microphone
asource = ADSFactory.ads(record=True)

validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50)
tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=250, max_continuous_silence=30)

player = player_for(asource)
asource.open()

tokenizer.tokenize(asource, callback=echo)
'''
asource = ADSFactory.ads(sampling_rate=16000,
                         sample_width=2,
                         channels=1,
                         frames_per_buffer=128,
                         record=False,
                         block_dur=0.01)

validator = AudioEnergyValidator(sample_width=asource.get_sample_width(),
                                 energy_threshold=50)
tokenizer = StreamTokenizer(validator=validator,
                            min_length=100,
                            max_continuous_silence=500)

asource.open()

tokenizer.tokenize(asource, callback=echo)
Example #29
0
    def test_min_5_max_10_max_continuous_silence_0(self):

        tokenizer = StreamTokenizer(self.A_validator,
                                    min_length=5,
                                    max_length=10,
                                    max_continuous_silence=0,
                                    init_min=3,
                                    init_max_silence=3,
                                    mode=0)

        data_source = StringDataSource("aaaAAAAAaAAAAAAaaAAAAAAAAAa")
        #                                  ^   ^ ^    ^  ^       ^
        #                                  3   7 9   14 17      25

        tokens = tokenizer.tokenize(data_source)

        self.assertEqual(
            len(tokens),
            3,
            msg="wrong number of tokens, expected: 3, found: {0} ".format(
                len(tokens)))
        tok1, tok2, tok3 = tokens[0], tokens[1], tokens[2]

        data = ''.join(tok1[0])
        start = tok1[1]
        end = tok1[2]
        self.assertEqual(
            data,
            "AAAAA",
            msg="wrong data for token 1, expected: 'AAAAA', found: '{0}' ".
            format(data))
        self.assertEqual(
            start,
            3,
            msg="wrong start frame for token 1, expected: 3, found: {0} ".
            format(start))
        self.assertEqual(
            end,
            7,
            msg="wrong end frame for token 1, expected: 7, found: {0} ".format(
                end))

        data = ''.join(tok2[0])
        start = tok2[1]
        end = tok2[2]
        self.assertEqual(
            data,
            "AAAAAA",
            msg="wrong data for token 1, expected: 'AAAAAA', found: '{0}' ".
            format(data))
        self.assertEqual(
            start,
            9,
            msg="wrong start frame for token 1, expected: 9, found: {0} ".
            format(start))
        self.assertEqual(
            end,
            14,
            msg="wrong end frame for token 1, expected: 14, found: {0} ".
            format(end))

        data = ''.join(tok3[0])
        start = tok3[1]
        end = tok3[2]
        self.assertEqual(
            data,
            "AAAAAAAAA",
            msg="wrong data for token 1, expected: 'AAAAAAAAA', found: '{0}' ".
            format(data))
        self.assertEqual(
            start,
            17,
            msg="wrong start frame for token 1, expected: 17, found: {0} ".
            format(start))
        self.assertEqual(
            end,
            25,
            msg="wrong end frame for token 1, expected: 25, found: {0} ".
            format(end))