def _playAudio(self, audio):
        CHUNK = 1024

        f = io.BytesIO()
        f.write(audio)
        f.seek(0)
        wf = wave.Wave_read(f)

        p = pyaudio.PyAudio()

        stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                        channels=wf.getnchannels(),
                        rate=wf.getframerate(),
                        output=True)

        data = wf.readframes(CHUNK)

        while data != b'':
            stream.write(data)
            data = wf.readframes(CHUNK)

        time.sleep(0.2)
        stream.stop_stream()
        stream.close()
        p.terminate()
def generate_transcript(language_code="ro-RO"):

    # Creates google client
    client = speech.SpeechClient()

    # Full path of the audio file, Replace with your file name
    file_name = os.path.join(os.path.dirname(__file__), "cache/recording.wav")
    wav_file = wave.Wave_read(file_name)
    ch = wav_file.getnchannels()

    #Loads the audio file into memory
    with io.open(file_name, "rb") as audio_file:
        content = audio_file.read()
        audio = speech.RecognitionAudio(content=content)

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        audio_channel_count=ch,
        language_code=language_code,
    )

    # Sends the request to google to transcribe the audio
    response = client.recognize(request={"config": config, "audio": audio})

    return response
Ejemplo n.º 3
0
    def Process(self):
        wr = wave.Wave_read(self.args.infile)

        self.numtracks = wr.getnchannels()
        self.framerate = wr.getframerate()
        self.sampwidth = wr.getsampwidth()
        self.nframes = wr.getnframes()

        if self.args.track >= self.numtracks:
            print('Error, file only has ' + str(self.numtracks) + ' tracks.')
            sys.exit(-1)

        self.persample = PerSample(self.args, self.framerate)

        offset = self.args.track * self.sampwidth

        # Set up the start and finish frame numbers.
        frameEnd = int(self.args.end * self.framerate)
        if frameEnd < 0:
            frameEnd = self.nframes
        frameEnd = min(frameEnd, self.nframes)
        frameStart = int(self.args.start * self.framerate)
        frameStart = max(0, frameStart)
        frameStart = min(frameStart, frameEnd)
        if frameStart > 0:
            wr.readframes(frameStart)
        for i in range(frameStart, frameEnd):
            frame = wr.readframes(1)
            # Get one signed, 16-bit sample
            sample = frame[offset] + frame[offset + 1] * 256
            if sample > 32767:
                sample = sample - 65536
            self.persample.Process(i, sample)
Ejemplo n.º 4
0
 def run(self, audio):
     audio = normalize_audio(audio)
     audio = BytesIO(audio)
     with wave.Wave_read(audio) as wav:
         audio = np.frombuffer(wav.readframes(wav.getnframes()), np.int16)
     result = self.model.stt(audio_buffer=audio)
     return result
Ejemplo n.º 5
0
def setup():
    settings = {}
    settings['channels'] = 1
    settings['bit_depth'] = 3 if USE_24_BIT else 2

    settings['profile'] = ask(
        'Give the sound profile [0=Sine, 1=Square, 2=Saw, 3=test.wav]',
        int,
        d=0)
    if settings['profile'] != 3:
        settings['frequency'] = ask('Give the note frequency [Hz]',
                                    float,
                                    d=440)
        settings['sample_rate'] = ask('Give the sampling rate [Hz]',
                                      int,
                                      d=44100)
        settings['duration'] = ask('Give the desired test duration [s]',
                                   float,
                                   d=5)
    else:
        fp = dirname(__file__)
        with wave.Wave_read(join_path(fp, 'test.wav')) as wav:
            settings['sample_rate'] = wav.getframerate()
            settings['duration'] = wav.getnframes() / settings['sample_rate']
            settings['channels'] = wav.getnchannels()
            settings['bit_depth'] = wav.getsampwidth()

    return settings
Ejemplo n.º 6
0
def getTimes(song, chunk = 2048 * 4):
    wav = wave.Wave_read(song)
    rate = wav.getframerate()
    data = getData(song)
    #holds average of the powers of the last 20 chunks
    avgList = []
    # returns the times when it is considered a beat
    timeList = []
    i = 0
    start = time.time()
    # goes through all the chunks in the song
    while True:
        power = 20*np.log10(np.abs(np.fft.rfft(data[chunk * i: chunk * (1 + i), 0])))
        freq = np.linspace(0, rate/2.0, len(power))
        if (chunk * (i + 1)) > len(data):
            break
        avg = abs(sum(power) / len(power))
        # compares the power of current chunk to the last 20
        if i > 19:
            if avg > sum(avgList) / len(avgList):
                # chunk * i / rate = time in seconds
                timeS = chunk * i / rate
                timeMS = timeS * 1000
                timeList.append(timeMS)
            avgList.pop(0)
        avgList.append(avg)
        i += 1  
    end = time.time()
    print(end - start, 'seconds to run')
    wav.close()
    return timeList
Ejemplo n.º 7
0
def BASE_WAVEFORM(ts, **kwarks):
    choice = kwarks.get('profile', 1)
    if choice == 1:  # Square
        return np.sign(np.sin(2 * np.pi * ts * kwarks.get('frequency', 440)))
    elif choice == 2:  # Sawtooth
        ret = np.mod(ts, 1 / kwarks.get('frequency', 440))
        return ret / np.max(np.abs(ret)) - 0.5
    elif choice == 3:
        fp = dirname(__file__)
        with wave.Wave_read(join_path(fp, 'test.wav')) as wav:
            audio = wav.readframes(wav.getnframes())
            bit_depth = kwarks['bit_depth']
            if bit_depth == 3:
                audio = np.frombuffer(b''.join([
                    audio[3 * i:3 * (i + 1)] + b'\x00'
                    for i in range(len(audio))
                ]),
                                      dtype=np.int32)  # Not tested at all
            elif bit_depth == 2:
                audio = np.frombuffer(audio, dtype=np.int16)
            else:
                raise Exception('Non-supported bit depth')

            audio = audio.astype(float)

            return (audio -
                    (audio.max() - audio.min()) / 2) / np.max(np.abs(audio))

    else:  # If choice is not 1, 2 or 3 the selected profile is sine wave.
        return np.sin(2 * np.pi * ts * kwarks.get('frequency', 440))
Ejemplo n.º 8
0
 def run_with_metadata(self, audio) -> Metadata:
     normalized_audio = normalize_audio_input(audio)
     audio_streams = BytesIO(normalized_audio)
     with wave.Wave_read(audio_streams) as wav:
         audio_streams = np.frombuffer(wav.readframes(wav.getnframes()),
                                       np.int16)
     results = self.model.sttWithMetadata(audio_buffer=audio_streams)
     return results
 def run(self, audio):
     """Perform speech-to-text transcription"""
     audio = normalize_audio(audio)
     audio = BytesIO(audio)
     with wave.Wave_read(audio) as wav:
         audio = np.frombuffer(wav.readframes(wav.getnframes()), np.int16)
     result = self.model.stt(audio_buffer=audio)
     return result
Ejemplo n.º 10
0
 def loadwav(fname):
     with wave.Wave_read(fname) as f:
         assert f.getframerate(
         ) == sp.sf, 'sampling rate is different (' + fname + ')'
         assert f.getnchannels(
         ) == 1, 'channel is not 1 (' + fname + ')'
         T = numpy.frombuffer(f.readframes(f.getnframes()),
                              numpy.int16).astype(numpy.float32)
         return T
Ejemplo n.º 11
0
 def load_waveform_from_wave_file(self, filename, dtype=np.float):
     """Opens wave file and reads, assuming signed shorts"""
     wr = wave.Wave_read(filename)
     fs = wr.getframerate()
     sig = np.array(struct.unpack('%dh' % wr.getnframes(),
                                  wr.readframes(wr.getnframes())),
                    dtype=dtype)
     wr.close()
     return sig, fs
Ejemplo n.º 12
0
def is_valid_wav(filename):
    # check the sampling rate and number bits of the WAV
    try:
        wav_file = wave.Wave_read(filename)
    except:
        return False
    if wav_file.getframerate() != 16000 or wav_file.getsampwidth() != 2 or wav_file.getnchannels() != 1 \
        or wav_file.getcomptype() != 'NONE':
        return False
    return True
Ejemplo n.º 13
0
    def run(self, audio) -> str:
        """ Receives the audio,  normalizes it and is sent to the model to be transcribed. Returns the result of the
        transcribe audio in string format."""

        normalized_audio = normalize_audio_input(audio)
        audio_streams = BytesIO(normalized_audio)
        with wave.Wave_read(audio_streams) as wav:
            audio_streams = np.frombuffer(wav.readframes(wav.getnframes()),
                                          np.int16)
        results = self.model.stt(audio_buffer=audio_streams)
        return results
Ejemplo n.º 14
0
def testWaveFile(filename):
    w = wave.Wave_read(filename)
    bitrate = w.getframerate()
    channels = w.getnchannels()
    bits = w.getsampwidth()*8
    if not bitrate==8000 or not channels==1 or not bits==16:
        newFilename = filename[:-4] + "_8000.wav"
        returnValue = os.system(SOXCOMMAND.format(filename, newFilename))    
        if returnValue:
            raise(SOX_Exception("Something went wrong calling sox: SOXCOMMAND.format(filename, newFilename"))
        filename = newFilename
    return(filename)
Ejemplo n.º 15
0
def read_wave_file(file, data_min_proportion=1.0):
    """
    Reads a wave file and returns it as a NumPy array.

    Args:
       file:  Either a filename or a file object

    Returns a 2-tuple of:
         (samprate, data)
    where samprate is the sampling rate as in integer (e.g. 16000), and `data`
    is a numpy array with dtype int16 and shape (num_channels, num_samples).


    Raises:
      RuntimeError: if an error occurred while reading the data.
                 (Note: if more than `data_min_proportion` of the
                 expected data was read, it will succeed even if
                 the file was truncated.)
      wave.Error: whatever errors the wave module encountered
      OsError (via wave module), if a file could not be opened.
    """

    wave_reader = wave.Wave_read(file)
    (nchannels, sampwidth, framerate, nframes, comptype,
     compname) = wave_reader.getparams()

    if comptype != 'NONE':
        raise RuntimeError(
            "Wave file has compression, which is unsupported: comptype={},"
            "compname={}".format(comptype, compname))
    if sampwidth != 2:
        raise RuntimeError(
            "Wave file has sample width of {}, expected 2.".format(sampwidth))

    data_as_bytes = wave_reader.readframes(nframes)
    nframes_read = len(data_as_bytes) // (sampwidth * nchannels)

    assert nframes_read <= nframes
    if nframes_read < data_min_proportion * nframes:
        raise RuntimeError(
            "Reading data from {0}, read too little data: {1} != {2} "
            "(min allowed proportion: {3})".format(file, nframes_read, nframes,
                                                   dat_min_proportion))

    dt = np.dtype('int16')
    if sys.byteorder == 'big':
        # Make sure to interpret the data as little-endian even if the machine
        # is big endian.
        dt = dt.newbyteorder('<')

    array = np.frombuffer(data_as_bytes, dt)
    # order='F' because the frame has a higher stride than the channel.
    return (framerate, array.reshape((nchannels, nframes_read), order='F'))
Ejemplo n.º 16
0
def build_data(wav, begin=None, end=None):
    wav_in_file = wave.Wave_read(wav)
    wav_in_num_samples = wav_in_file.getnframes()
    N = wav_in_file.getnframes()
    dstr = wav_in_file.readframes(N)
    data = np.fromstring(dstr, np.int16)
    if begin is not None and end is not None:
        return data[begin * 16000:end * 16000]
    X = []
    l = len(data)
    for i in range(0, l - 100, 160):
        X.append(data[i:i + 480])
    return X
Ejemplo n.º 17
0
def is_valid_wav(filename):
    # check the sampling rate and number bits of the WAV
    try:
        wav_file = wave.Wave_read(filename)
        if wav_file.getframerate() != 16000 or wav_file.getsampwidth() != 2 or wav_file.getnchannels() != 1 \
            or wav_file.getcomptype() != 'NONE':
            wav_file.close()
            return False
        wav_file.close()
        return True
    except Exception:
        if 'wav_file' in locals():
            wav_file.close()
        return False
Ejemplo n.º 18
0
def fixWaveFile(filename):
    w = wave.Wave_read(filename)
    bitrate = w.getframerate()
    channels = w.getnchannels()
    bits = w.getsampwidth() * 8
    if not bitrate == 8000 or not channels == 1 or not bits == 16:
        newFilename = filename[:-4] + "_8000.wav"
        returnValue = os.system(SOXCOMMAND.format(filename, newFilename))
        if returnValue:
            raise (SOX_Exception("""Something went wrong calling sox: 
SOXCOMMAND.format(filename, newFilename
Is sox installed?  If not, just make sure that you've saved 8kHz mono wav files."""
                                 ))
        filename = newFilename
    return (filename)
Ejemplo n.º 19
0
def fixWaveFile(filename):
    w = wave.Wave_read(filename)
    bitrate = w.getframerate()
    channels = w.getnchannels()
    bits = w.getsampwidth() * 8
    if not bitrate == 8000 or not channels == 1 or not bits == 16:
        newFilename = filename[:-4] + "_8000.wav"
        returnValue = os.system(SOXCOMMAND.format(filename, newFilename))
        if returnValue:
            raise (SOX_Exception("""Nie udało się wywołać programu sox: 
SOXCOMMAND.format(filename, newFilename
Czy program sox jest zainstalowany? Sprawdź też, czy pliki dźwiękowe mają format wav 8kHz mono."""
                                 ))
        filename = newFilename
    return (filename)
Ejemplo n.º 20
0
    def convert(self):
        print("convert")
        try:
            pathname = os.path.normpath(self.pathname + '/../')
            language = self.ui.comboBox_language.currentText()
            for i in range(0, 101):
                file = wave.Wave_read(
                    os.path.join(pathname, 'Languages', language,
                                 '{:04d}.wav'.format(i)))
                file1 = wave.Wave_write(
                    os.path.join(pathname, 'Languages', language, 'work',
                                 str(i) + '.wav'))

                file1.setparams(file.getparams())
                file1.writeframes(file.readframes(file.getnframes()))
                file1.close()
                file.close()

        except TypeError as e:
            print("wavefile : ", e)
Ejemplo n.º 21
0
def crop_file(input_wav, output_wav, start_time, end_time):
    wav_in_file = wave.Wave_read(input_wav)
    wav_in_num_samples = wav_in_file.getnframes()
    wav_out_file = wave.Wave_write(output_wav)
    wav_out_file.setparams(
        (wav_in_file.getnchannels(), wav_in_file.getsampwidth(),
         wav_in_file.getframerate(), float(end_time) - float(start_time) + 1,
         'NONE', 'noncompressed'))
    start_sample = int(float(start_time) * wav_in_file.getframerate())
    end_sample = int(float(end_time) * wav_in_file.getframerate())

    # writing the wav file from the given regions
    for i in range(0, wav_in_num_samples):
        samples = wav_in_file.readframes(1)
        if start_sample <= i <= end_sample:
            wav_out_file.writeframes(samples)
            # samples_unpacked = struct.unpack("<h", samples)
            # print i, int(samples_unpacked[0])

    wav_in_file.close()
    wav_out_file.close()
Ejemplo n.º 22
0
    def play_audio(self,
                   audio_data_src: http.client.HTTPResponse or io.BytesIO,
                   format: str = 'wav') -> None:
        '''
        Plays audio_data with format=format
        Args:
            audio_data: binary audio source
            format: audio format
        '''
        try:
            if format == 'wav':
                #define stream chunk
                chunk = 1024

                #open a wav format music
                f = wave.Wave_read(audio_data_src)
                #instantiate PyAudio
                p = pyaudio.PyAudio()
                #open stream
                stream = p.open(format=p.get_format_from_width(
                    f.getsampwidth()),
                                channels=f.getnchannels(),
                                rate=f.getframerate(),
                                output=True)
                #read data
                data = f.readframes(chunk)

                #play stream
                while data:
                    stream.write(data)
                    data = f.readframes(chunk)

                #stop stream
                stream.stop_stream()
                stream.close()

                #close PyAudio
                p.terminate()
        except Exception as e:
            ErrorLogger(__file__, e)
Ejemplo n.º 23
0
def read_wav_file(file: Text) -> Tuple[int, np.ndarray]:
    """
  Reads a wave file and returns it as a NumPy array.

  Args:
     file:  Filepath to a .wav file.

  Returns: (samprate, data) where samprate is the sampling frequency and data
  is a numpy array with dtype int16 and shape (num_channels, num_samples).

  Raises:
    RuntimeError: if an error occurred while reading the data.
    wave.Error: whatever errors the wave module encountered
    OsError (via wave module), if a file could not be opened.
  """

    wave_reader = wave.Wave_read(file)
    (nchannels, sampwidth, framerate, nframes, comptype,
     compname) = wave_reader.getparams()
    if comptype != 'NONE':
        raise RuntimeError(
            "Wave file has compression, which is unsupported: comptype={},"
            "compname={}".format(comptype, compname))
    # Expect 16-bit  magnitude sampling.
    if sampwidth != 2:
        raise RuntimeError(
            "Wave file has sample width of {}, expected 2.".format(sampwidth))
    data_as_bytes = wave_reader.readframes(nframes)
    nframes_read = len(data_as_bytes) // (sampwidth * nchannels)
    assert nframes_read <= nframes
    dt = np.dtype('int16')
    if sys.byteorder == 'big':
        # Make sure to interpret the data as little-endian even if the machine
        # is big endian.
        dt = dt.newbyteorder('<')
    array = np.frombuffer(data_as_bytes, dt)
    # order='F' because the frame has a higher stride than the channel.
    return framerate, array.reshape((nchannels, nframes_read), order='F')
Ejemplo n.º 24
0
Archivo: util.py Proyecto: SRHerzog/ut
def wf_and_sr_from_filepath(filepath, **kwargs):

    must_ensure_mono = kwargs.pop('ensure_mono', True)

    if is_wav_file(filepath):
        kwargs = dict({'always_2d': False}, **kwargs)
        if 'offset_s' in kwargs.keys() or 'duration' in kwargs.keys():
            sample_rate = wave.Wave_read(filepath).getframerate()
            start = int(round(kwargs.pop('offset_s', 0) * sample_rate))
            kwargs['start'] = start
            duration = kwargs.pop('duration', None)
            if duration is not None:
                kwargs['stop'] = int(start + round(duration * sample_rate))

        kwargs = filter_kwargs_to_func_arguments(sf.read, kwargs)
        wf, sr = sf.read(filepath, **kwargs)
    else:
        kwargs['offset'] = kwargs.pop('offset_s', 0.0)
        wf, sr = librosa.load(filepath, **kwargs)

    if must_ensure_mono:
        wf = ensure_mono(wf)
    return wf, sr
Ejemplo n.º 25
0
    def FileSelected(self, filename):
        """
        Set a sound file.

        @param filename (string) is an audio file name (a wave is expected).

        """
        # we already opened the same file
        if filename == self._filename and self._mediaplayer is not None:
            logging.info(' ... SndPlayer: file %s was already opened. [WARNING]' % (filename))
            return

        try:
            m = wx.media.MediaCtrl(self, style=wx.NO_BORDER)
            m.Load(filename)
            self._length = m.Length()
            if self._length == 0: # **** BUG of the MediaPlayer! ****
                import wave
                w = wave.Wave_read(filename)
                self._length = int(1000 * float(w.getnframes())/float(w.getframerate()))
            logging.info(" ... File %s successfully loaded. [  OK  ]" % (filename))
        except Exception as e:
            logging.info(" ... File %s not loaded.  [ ERROR ]" % (filename))
            ShowInformation(self, self._prefs, 'Error loading: '+filename+': '+str(e), style=wx.ICON_ERROR)
            return False

        # set mediaplayer with the new one
        self._filename = filename
        self._mediaplayer = m
        self.ActivateButtons(True)
        self._offsets = (0,self._length)
        if self._playbackSlider is not None:
            self._playbackSlider.SetRange(0, self._length)
            self._playbackSlider.SetTickFreq(int(self._length/10), 1)

        self._timer.Start(self._refreshTimer)
        self.Refresh()
    phoneme_classifier_SIGMA = "4.3589"
    phoneme_classifier_C = "1"
    phoneme_classifier_B = "0.8"
    phoneme_classifier_epochs = "1"
    phoneme_classifier_model = "models/pa_phoeneme_frame_based.C_%s.B_%s.sigma_%s.pad_%s.epochs_%s.model" % \
                               (phoneme_classifier_C, phoneme_classifier_B, phoneme_classifier_SIGMA,
                                phoneme_classifier_pad, phoneme_classifier_epochs)

    # generate intermediate files from a temp filename
    (tmp_fd, tmp_filename) = tempfile.mkstemp()
    wav_filename = tmp_filename + ".16kHz.wav"
    mfc_filename = tmp_filename + ".mfc"
    mfcc_tmp_file = tmp_filename + ".mfc_delta"

    # read Wav file parameters
    wave_file = wave.Wave_read(args.wav_filename)
    wave_sampling_rate = wave_file.getframerate()
    wave_file.close()

    # converts WAV to 16kHz
    if wave_sampling_rate != 16000:
        cmd = "%s %s -r 16k %s remix 1" % (sox_bin, args.wav_filename, wav_filename)
        easy_call(cmd)
        rm_wav_file = True
    else:
        wav_filename = args.wav_filename
        rm_wav_file = False

    # extract MFCC features using HCopy utility
    cmd_params = "%s -C %s %s %s" % (hcopy_bin, htk_config, wav_filename, mfc_filename)
    easy_call(cmd_params)
Ejemplo n.º 27
0
def unpackMono(waveFile):
    w = wave.Wave_read(waveFile)
    data = []
    for i in range(w.getnframes()):
        data.append(unpack("B", w.readframes(1))[0])
    return (data)
Ejemplo n.º 28
0
playsound('indian.wav')

# wav file says 'sorry!'
# Lets try sorry.html

url = f"{standard_url}sorry.html"
response = requests.get(url, auth=HTTPBasicAuth('butter', 'fly'))
page_contents = BeautifulSoup(response.text, 'html.parser')

print(page_contents)
#* - "what are you apologizing for?"

# Let's play with wave module
import wave

indian = wave.Wave_read('indian.wav')
print(f'''Number of channels: {indian.getnchannels()}'
Sample width: {indian.getsampwidth()}
Frame rate: {indian.getframerate()}
Number of frames: {indian.getnframes()}''')

# use parameters: .getparams()} to get al once

new = wave.Wave_write('new.wav')
new.setframerate(11025 * 2)  # increase frame rate
new.setsampwidth(1)  # decrease sample width
new.setnframes(55788)
new.setnchannels(1)

print(f'''Number of channels: {new.getnchannels()}
Sample width: {new.getsampwidth()}
Ejemplo n.º 29
0
    def run(self, musicbrainzid, fname):
        baseFname, ext = os.path.splitext(os.path.basename(fname))

        wavfname, created = util.docserver_get_wav_filename(musicbrainzid)

        panelWidth = 900  # pixels
        panelHeight = 255  # pixels
        zoomlevels = self._zoom_levels  # seconds
        options = coll.namedtuple(
            'options',
            'image_height fft_size image_width f_min f_max scale_exp pallete')
        options.image_height = panelHeight
        options.fft_size = self._fft_size
        options.f_min = self._f_min
        options.f_max = self._f_max
        options.pallete = self._pallete
        options.scale_exp = self._scale_exp

        ret = {}
        for zoom in zoomlevels:
            # At the beginning of each zoom level we reset the image_width
            # since we are modifying it at the end of the last zoom level
            options.image_width = panelWidth

            wvFile = wave.Wave_read(wavfname)
            framerate = wvFile.getframerate()
            totalframes = wvFile.getnframes()

            # We want this many frames per file at this zoom level.
            framesperimage = framerate * zoom

            wfname = "waveform%s" % zoom
            specname = "spectrum%s" % zoom
            inv_mfcc_name = "inv_mfcc_spectrum%s" % zoom
            wfdata = []
            specdata = []
            inv_mfcc_data = []

            sumframes = 0
            while sumframes < totalframes:
                if sumframes + framesperimage > totalframes:
                    remaining_frames = (totalframes - sumframes)
                    options.image_width = options.image_width * remaining_frames / framesperimage
                else:
                    remaining_frames = framesperimage

                fp, smallname = tempfile.mkstemp(".wav")
                os.close(fp)
                data = wvFile.readframes(remaining_frames)
                wavout = wave.open(smallname, "wb")
                # This will set nframes, but writeframes resets it
                wavout.setparams(wvFile.getparams())
                wavout.writeframes(data)
                wavout.close()
                sumframes += framesperimage

                specio = io.BytesIO()
                # Set the name attr so that PIL gets the filetype hint
                specio.name = "spec.png"
                wavio = io.BytesIO()
                wavio.name = "wav.png"
                in_mfcc_io = io.BytesIO()
                in_mfcc_io.name = "melspec.png"

                w2png.genimages(smallname, wavio, specio, in_mfcc_io, options)
                os.unlink(smallname)

                specdata.append(specio.getvalue())
                wfdata.append(wavio.getvalue())
                inv_mfcc_data.append(in_mfcc_io.getvalue())

            ret[wfname] = wfdata
            ret[specname] = specdata
            ret[inv_mfcc_name] = inv_mfcc_data

        ret["smallfull"] = self.make_mini(wavfname)
        if created:
            os.unlink(wavfname)

        return ret
Ejemplo n.º 30
0
    def run(self):
        def _frame_index_to_sec(frame_index):
            return (float(frame_index * rhino.frame_length) /
                    float(rhino.sample_rate)) - float(1)

        """
         Creates an input audio stream, initializes wake word detection (Porcupine) and speech to intent (Rhino)
         engines, and monitors the audio stream for occurrences of the wake word and then infers the intent from speech
         command that follows.
         """

        porcupine = None
        rhino = None
        pa = None
        audio_stream = None

        wake_phrase_detected = True
        intent_extraction_is_finalized = False
        Apath = Video_to_Audio(self._video_path)
        wf = wave.Wave_read(Apath)
        ww, sr = soundfile.read(Video_to_Audio(self._video_path))
        print(len(ww))
        try:
            porcupine = Porcupine(
                library_path=self._porcupine_library_path,
                model_file_path=self._porcupine_model_file_path,
                keyword_file_paths=[self._porcupine_keyword_file_path],
                sensitivities=[0.5],
            )

            rhino = Rhino(
                library_path=self._rhino_library_path,
                model_path=self._rhino_model_file_path,
                context_path=self._rhino_context_file_path,
                sensitivity=0.6,
            )

            print()
            print(
                "****************************** context ******************************"
            )
            print(rhino.context_info)
            print(
                "*********************************************************************"
            )
            print()

            pa = pyaudio.PyAudio()

            audio_stream = pa.open(
                rate=porcupine.sample_rate,
                channels=1,
                format=pyaudio.paInt16,
                input=True,
                frames_per_buffer=porcupine.frame_length,
                input_device_index=self._input_device_index,
            )

            test = 0
            Tpath = Apath.replace("wav", "txt")
            f = open(Tpath, "w")

            ouput = ""
            classtr = ""
            startcount = 0
            endcount = 0
            cango = 1
            checkfirst = 0
            data_csv = [["Class_num", "Start_time", "End_time"]]
            ClassNum = None
            Start_time = None
            Start_time2 = None
            ClassNum2 = None
            rm = None
            # NOTE: This is true now and will be correct possibly forever. If it changes the logic below need to change.
            assert porcupine.frame_length == rhino.frame_length
            try:
                while True:

                    date = wf.readframes(porcupine.frame_length)
                    pcm = audio_stream.read(porcupine.frame_length,
                                            exception_on_overflow=False)

                    pcm = struct.unpack_from("h" * porcupine.frame_length,
                                             date)

                    if self._output_path is not None:
                        self._recorded_frames.append(pcm)

                    if not wake_phrase_detected:
                        wake_phrase_detected = porcupine.process(pcm)

                        if wake_phrase_detected:
                            print("detected wake phrase")
                    elif not intent_extraction_is_finalized:
                        intent_extraction_is_finalized = rhino.process(pcm)

                    else:

                        if rhino.is_understood():
                            cango = 1
                            intent, slot_values = rhino.get_intent()
                            print()
                            if intent == "EndWork":

                                endcount += 1
                                classstr = " - %s" % _frame_index_to_sec(test)

                            else:
                                checkfirst += 1
                                startcount += 1
                                endcount = 0
                                for slot, value in slot_values.items():
                                    print("%s: %s" % (slot, value))
                                    classstr = ("%s: %s" % (slot, value)) + (
                                        " start time is %s" %
                                        _frame_index_to_sec(test))
                                    if startcount == 2:
                                        Start_time2 = Start_time
                                        ClassNum2 = ClassNum
                                    Start_time = _frame_index_to_sec(test)
                                    ClassNum = value
                            print()

                            print("intent : %s at time: %f" %
                                  (intent, _frame_index_to_sec(test)))
                            print()
                        else:
                            print("didn't understand the command")
                            cango = 0

                        rhino.reset()
                        wake_phrase_detected = True
                        intent_extraction_is_finalized = False
                        print(startcount, endcount)
                        print(ouput)

                        if cango:
                            if endcount == 1 and startcount == 0:
                                ouput = classstr
                                f.write("-1 class end at" + ouput + "\n")
                                endcount = 0
                                ouput = ""
                                data_csv.append(
                                    ["-1", "-1",
                                     _frame_index_to_sec(test)])
                            elif ouput == "" and endcount == 0 and startcount == 1:
                                ouput = classstr

                            elif ouput != "" and endcount == 1:
                                try:
                                    data_csv.remove(rm)
                                except:
                                    pass
                                data_csv.append([
                                    ClassNum, Start_time,
                                    _frame_index_to_sec(test)
                                ])
                                ouput += classstr
                                endcount = 0
                                startcount = 0
                                f.write(ouput + "\n")
                                ouput = ""
                            elif endcount == 0 and startcount == 2:
                                if checkfirst == 2:
                                    data_csv.append(
                                        [ClassNum2, Start_time2, "-1"])

                                    f.write(ouput + "\n")
                                data_csv.append([ClassNum, Start_time, "-1"])
                                rm = [ClassNum, Start_time, "-1"]
                                ouput = classstr
                                f.write(ouput + "\n")
                                startcount = 1

                    test += 1
            except:
                print("EOF")
                print(_frame_index_to_sec(test))
                data_csv.append(["Maybe miss", classstr, classstr])
                f.write("Могла быть упущенная метка : %s" % classstr)
                with open("sw_data_new.csv", "w") as f:
                    writer = csv.writer(f)
                    for row in data_csv:
                        writer.writerow(row)

        except KeyboardInterrupt:
            print("stopping ...")

        finally:
            if porcupine is not None:
                porcupine.delete()

            if rhino is not None:
                rhino.delete()

            if audio_stream is not None:
                audio_stream.close()

            if pa is not None:
                pa.terminate()

            if self._output_path is not None and len(
                    self._recorded_frames) > 0:
                recorded_audio = np.concatenate(self._recorded_frames,
                                                axis=0).astype(np.int16)
                soundfile.write(
                    os.path.expanduser(self._output_path),
                    recorded_audio,
                    samplerate=porcupine.sample_rate,
                    subtype="PCM_16",
                )