def _playAudio(self, audio): CHUNK = 1024 f = io.BytesIO() f.write(audio) f.seek(0) wf = wave.Wave_read(f) p = pyaudio.PyAudio() stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), output=True) data = wf.readframes(CHUNK) while data != b'': stream.write(data) data = wf.readframes(CHUNK) time.sleep(0.2) stream.stop_stream() stream.close() p.terminate()
def generate_transcript(language_code="ro-RO"): # Creates google client client = speech.SpeechClient() # Full path of the audio file, Replace with your file name file_name = os.path.join(os.path.dirname(__file__), "cache/recording.wav") wav_file = wave.Wave_read(file_name) ch = wav_file.getnchannels() #Loads the audio file into memory with io.open(file_name, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, audio_channel_count=ch, language_code=language_code, ) # Sends the request to google to transcribe the audio response = client.recognize(request={"config": config, "audio": audio}) return response
def Process(self): wr = wave.Wave_read(self.args.infile) self.numtracks = wr.getnchannels() self.framerate = wr.getframerate() self.sampwidth = wr.getsampwidth() self.nframes = wr.getnframes() if self.args.track >= self.numtracks: print('Error, file only has ' + str(self.numtracks) + ' tracks.') sys.exit(-1) self.persample = PerSample(self.args, self.framerate) offset = self.args.track * self.sampwidth # Set up the start and finish frame numbers. frameEnd = int(self.args.end * self.framerate) if frameEnd < 0: frameEnd = self.nframes frameEnd = min(frameEnd, self.nframes) frameStart = int(self.args.start * self.framerate) frameStart = max(0, frameStart) frameStart = min(frameStart, frameEnd) if frameStart > 0: wr.readframes(frameStart) for i in range(frameStart, frameEnd): frame = wr.readframes(1) # Get one signed, 16-bit sample sample = frame[offset] + frame[offset + 1] * 256 if sample > 32767: sample = sample - 65536 self.persample.Process(i, sample)
def run(self, audio): audio = normalize_audio(audio) audio = BytesIO(audio) with wave.Wave_read(audio) as wav: audio = np.frombuffer(wav.readframes(wav.getnframes()), np.int16) result = self.model.stt(audio_buffer=audio) return result
def setup(): settings = {} settings['channels'] = 1 settings['bit_depth'] = 3 if USE_24_BIT else 2 settings['profile'] = ask( 'Give the sound profile [0=Sine, 1=Square, 2=Saw, 3=test.wav]', int, d=0) if settings['profile'] != 3: settings['frequency'] = ask('Give the note frequency [Hz]', float, d=440) settings['sample_rate'] = ask('Give the sampling rate [Hz]', int, d=44100) settings['duration'] = ask('Give the desired test duration [s]', float, d=5) else: fp = dirname(__file__) with wave.Wave_read(join_path(fp, 'test.wav')) as wav: settings['sample_rate'] = wav.getframerate() settings['duration'] = wav.getnframes() / settings['sample_rate'] settings['channels'] = wav.getnchannels() settings['bit_depth'] = wav.getsampwidth() return settings
def getTimes(song, chunk = 2048 * 4): wav = wave.Wave_read(song) rate = wav.getframerate() data = getData(song) #holds average of the powers of the last 20 chunks avgList = [] # returns the times when it is considered a beat timeList = [] i = 0 start = time.time() # goes through all the chunks in the song while True: power = 20*np.log10(np.abs(np.fft.rfft(data[chunk * i: chunk * (1 + i), 0]))) freq = np.linspace(0, rate/2.0, len(power)) if (chunk * (i + 1)) > len(data): break avg = abs(sum(power) / len(power)) # compares the power of current chunk to the last 20 if i > 19: if avg > sum(avgList) / len(avgList): # chunk * i / rate = time in seconds timeS = chunk * i / rate timeMS = timeS * 1000 timeList.append(timeMS) avgList.pop(0) avgList.append(avg) i += 1 end = time.time() print(end - start, 'seconds to run') wav.close() return timeList
def BASE_WAVEFORM(ts, **kwarks): choice = kwarks.get('profile', 1) if choice == 1: # Square return np.sign(np.sin(2 * np.pi * ts * kwarks.get('frequency', 440))) elif choice == 2: # Sawtooth ret = np.mod(ts, 1 / kwarks.get('frequency', 440)) return ret / np.max(np.abs(ret)) - 0.5 elif choice == 3: fp = dirname(__file__) with wave.Wave_read(join_path(fp, 'test.wav')) as wav: audio = wav.readframes(wav.getnframes()) bit_depth = kwarks['bit_depth'] if bit_depth == 3: audio = np.frombuffer(b''.join([ audio[3 * i:3 * (i + 1)] + b'\x00' for i in range(len(audio)) ]), dtype=np.int32) # Not tested at all elif bit_depth == 2: audio = np.frombuffer(audio, dtype=np.int16) else: raise Exception('Non-supported bit depth') audio = audio.astype(float) return (audio - (audio.max() - audio.min()) / 2) / np.max(np.abs(audio)) else: # If choice is not 1, 2 or 3 the selected profile is sine wave. return np.sin(2 * np.pi * ts * kwarks.get('frequency', 440))
def run_with_metadata(self, audio) -> Metadata: normalized_audio = normalize_audio_input(audio) audio_streams = BytesIO(normalized_audio) with wave.Wave_read(audio_streams) as wav: audio_streams = np.frombuffer(wav.readframes(wav.getnframes()), np.int16) results = self.model.sttWithMetadata(audio_buffer=audio_streams) return results
def run(self, audio): """Perform speech-to-text transcription""" audio = normalize_audio(audio) audio = BytesIO(audio) with wave.Wave_read(audio) as wav: audio = np.frombuffer(wav.readframes(wav.getnframes()), np.int16) result = self.model.stt(audio_buffer=audio) return result
def loadwav(fname): with wave.Wave_read(fname) as f: assert f.getframerate( ) == sp.sf, 'sampling rate is different (' + fname + ')' assert f.getnchannels( ) == 1, 'channel is not 1 (' + fname + ')' T = numpy.frombuffer(f.readframes(f.getnframes()), numpy.int16).astype(numpy.float32) return T
def load_waveform_from_wave_file(self, filename, dtype=np.float): """Opens wave file and reads, assuming signed shorts""" wr = wave.Wave_read(filename) fs = wr.getframerate() sig = np.array(struct.unpack('%dh' % wr.getnframes(), wr.readframes(wr.getnframes())), dtype=dtype) wr.close() return sig, fs
def is_valid_wav(filename): # check the sampling rate and number bits of the WAV try: wav_file = wave.Wave_read(filename) except: return False if wav_file.getframerate() != 16000 or wav_file.getsampwidth() != 2 or wav_file.getnchannels() != 1 \ or wav_file.getcomptype() != 'NONE': return False return True
def run(self, audio) -> str: """ Receives the audio, normalizes it and is sent to the model to be transcribed. Returns the result of the transcribe audio in string format.""" normalized_audio = normalize_audio_input(audio) audio_streams = BytesIO(normalized_audio) with wave.Wave_read(audio_streams) as wav: audio_streams = np.frombuffer(wav.readframes(wav.getnframes()), np.int16) results = self.model.stt(audio_buffer=audio_streams) return results
def testWaveFile(filename): w = wave.Wave_read(filename) bitrate = w.getframerate() channels = w.getnchannels() bits = w.getsampwidth()*8 if not bitrate==8000 or not channels==1 or not bits==16: newFilename = filename[:-4] + "_8000.wav" returnValue = os.system(SOXCOMMAND.format(filename, newFilename)) if returnValue: raise(SOX_Exception("Something went wrong calling sox: SOXCOMMAND.format(filename, newFilename")) filename = newFilename return(filename)
def read_wave_file(file, data_min_proportion=1.0): """ Reads a wave file and returns it as a NumPy array. Args: file: Either a filename or a file object Returns a 2-tuple of: (samprate, data) where samprate is the sampling rate as in integer (e.g. 16000), and `data` is a numpy array with dtype int16 and shape (num_channels, num_samples). Raises: RuntimeError: if an error occurred while reading the data. (Note: if more than `data_min_proportion` of the expected data was read, it will succeed even if the file was truncated.) wave.Error: whatever errors the wave module encountered OsError (via wave module), if a file could not be opened. """ wave_reader = wave.Wave_read(file) (nchannels, sampwidth, framerate, nframes, comptype, compname) = wave_reader.getparams() if comptype != 'NONE': raise RuntimeError( "Wave file has compression, which is unsupported: comptype={}," "compname={}".format(comptype, compname)) if sampwidth != 2: raise RuntimeError( "Wave file has sample width of {}, expected 2.".format(sampwidth)) data_as_bytes = wave_reader.readframes(nframes) nframes_read = len(data_as_bytes) // (sampwidth * nchannels) assert nframes_read <= nframes if nframes_read < data_min_proportion * nframes: raise RuntimeError( "Reading data from {0}, read too little data: {1} != {2} " "(min allowed proportion: {3})".format(file, nframes_read, nframes, dat_min_proportion)) dt = np.dtype('int16') if sys.byteorder == 'big': # Make sure to interpret the data as little-endian even if the machine # is big endian. dt = dt.newbyteorder('<') array = np.frombuffer(data_as_bytes, dt) # order='F' because the frame has a higher stride than the channel. return (framerate, array.reshape((nchannels, nframes_read), order='F'))
def build_data(wav, begin=None, end=None): wav_in_file = wave.Wave_read(wav) wav_in_num_samples = wav_in_file.getnframes() N = wav_in_file.getnframes() dstr = wav_in_file.readframes(N) data = np.fromstring(dstr, np.int16) if begin is not None and end is not None: return data[begin * 16000:end * 16000] X = [] l = len(data) for i in range(0, l - 100, 160): X.append(data[i:i + 480]) return X
def is_valid_wav(filename): # check the sampling rate and number bits of the WAV try: wav_file = wave.Wave_read(filename) if wav_file.getframerate() != 16000 or wav_file.getsampwidth() != 2 or wav_file.getnchannels() != 1 \ or wav_file.getcomptype() != 'NONE': wav_file.close() return False wav_file.close() return True except Exception: if 'wav_file' in locals(): wav_file.close() return False
def fixWaveFile(filename): w = wave.Wave_read(filename) bitrate = w.getframerate() channels = w.getnchannels() bits = w.getsampwidth() * 8 if not bitrate == 8000 or not channels == 1 or not bits == 16: newFilename = filename[:-4] + "_8000.wav" returnValue = os.system(SOXCOMMAND.format(filename, newFilename)) if returnValue: raise (SOX_Exception("""Something went wrong calling sox: SOXCOMMAND.format(filename, newFilename Is sox installed? If not, just make sure that you've saved 8kHz mono wav files.""" )) filename = newFilename return (filename)
def fixWaveFile(filename): w = wave.Wave_read(filename) bitrate = w.getframerate() channels = w.getnchannels() bits = w.getsampwidth() * 8 if not bitrate == 8000 or not channels == 1 or not bits == 16: newFilename = filename[:-4] + "_8000.wav" returnValue = os.system(SOXCOMMAND.format(filename, newFilename)) if returnValue: raise (SOX_Exception("""Nie udało się wywołać programu sox: SOXCOMMAND.format(filename, newFilename Czy program sox jest zainstalowany? Sprawdź też, czy pliki dźwiękowe mają format wav 8kHz mono.""" )) filename = newFilename return (filename)
def convert(self): print("convert") try: pathname = os.path.normpath(self.pathname + '/../') language = self.ui.comboBox_language.currentText() for i in range(0, 101): file = wave.Wave_read( os.path.join(pathname, 'Languages', language, '{:04d}.wav'.format(i))) file1 = wave.Wave_write( os.path.join(pathname, 'Languages', language, 'work', str(i) + '.wav')) file1.setparams(file.getparams()) file1.writeframes(file.readframes(file.getnframes())) file1.close() file.close() except TypeError as e: print("wavefile : ", e)
def crop_file(input_wav, output_wav, start_time, end_time): wav_in_file = wave.Wave_read(input_wav) wav_in_num_samples = wav_in_file.getnframes() wav_out_file = wave.Wave_write(output_wav) wav_out_file.setparams( (wav_in_file.getnchannels(), wav_in_file.getsampwidth(), wav_in_file.getframerate(), float(end_time) - float(start_time) + 1, 'NONE', 'noncompressed')) start_sample = int(float(start_time) * wav_in_file.getframerate()) end_sample = int(float(end_time) * wav_in_file.getframerate()) # writing the wav file from the given regions for i in range(0, wav_in_num_samples): samples = wav_in_file.readframes(1) if start_sample <= i <= end_sample: wav_out_file.writeframes(samples) # samples_unpacked = struct.unpack("<h", samples) # print i, int(samples_unpacked[0]) wav_in_file.close() wav_out_file.close()
def play_audio(self, audio_data_src: http.client.HTTPResponse or io.BytesIO, format: str = 'wav') -> None: ''' Plays audio_data with format=format Args: audio_data: binary audio source format: audio format ''' try: if format == 'wav': #define stream chunk chunk = 1024 #open a wav format music f = wave.Wave_read(audio_data_src) #instantiate PyAudio p = pyaudio.PyAudio() #open stream stream = p.open(format=p.get_format_from_width( f.getsampwidth()), channels=f.getnchannels(), rate=f.getframerate(), output=True) #read data data = f.readframes(chunk) #play stream while data: stream.write(data) data = f.readframes(chunk) #stop stream stream.stop_stream() stream.close() #close PyAudio p.terminate() except Exception as e: ErrorLogger(__file__, e)
def read_wav_file(file: Text) -> Tuple[int, np.ndarray]: """ Reads a wave file and returns it as a NumPy array. Args: file: Filepath to a .wav file. Returns: (samprate, data) where samprate is the sampling frequency and data is a numpy array with dtype int16 and shape (num_channels, num_samples). Raises: RuntimeError: if an error occurred while reading the data. wave.Error: whatever errors the wave module encountered OsError (via wave module), if a file could not be opened. """ wave_reader = wave.Wave_read(file) (nchannels, sampwidth, framerate, nframes, comptype, compname) = wave_reader.getparams() if comptype != 'NONE': raise RuntimeError( "Wave file has compression, which is unsupported: comptype={}," "compname={}".format(comptype, compname)) # Expect 16-bit magnitude sampling. if sampwidth != 2: raise RuntimeError( "Wave file has sample width of {}, expected 2.".format(sampwidth)) data_as_bytes = wave_reader.readframes(nframes) nframes_read = len(data_as_bytes) // (sampwidth * nchannels) assert nframes_read <= nframes dt = np.dtype('int16') if sys.byteorder == 'big': # Make sure to interpret the data as little-endian even if the machine # is big endian. dt = dt.newbyteorder('<') array = np.frombuffer(data_as_bytes, dt) # order='F' because the frame has a higher stride than the channel. return framerate, array.reshape((nchannels, nframes_read), order='F')
def wf_and_sr_from_filepath(filepath, **kwargs): must_ensure_mono = kwargs.pop('ensure_mono', True) if is_wav_file(filepath): kwargs = dict({'always_2d': False}, **kwargs) if 'offset_s' in kwargs.keys() or 'duration' in kwargs.keys(): sample_rate = wave.Wave_read(filepath).getframerate() start = int(round(kwargs.pop('offset_s', 0) * sample_rate)) kwargs['start'] = start duration = kwargs.pop('duration', None) if duration is not None: kwargs['stop'] = int(start + round(duration * sample_rate)) kwargs = filter_kwargs_to_func_arguments(sf.read, kwargs) wf, sr = sf.read(filepath, **kwargs) else: kwargs['offset'] = kwargs.pop('offset_s', 0.0) wf, sr = librosa.load(filepath, **kwargs) if must_ensure_mono: wf = ensure_mono(wf) return wf, sr
def FileSelected(self, filename): """ Set a sound file. @param filename (string) is an audio file name (a wave is expected). """ # we already opened the same file if filename == self._filename and self._mediaplayer is not None: logging.info(' ... SndPlayer: file %s was already opened. [WARNING]' % (filename)) return try: m = wx.media.MediaCtrl(self, style=wx.NO_BORDER) m.Load(filename) self._length = m.Length() if self._length == 0: # **** BUG of the MediaPlayer! **** import wave w = wave.Wave_read(filename) self._length = int(1000 * float(w.getnframes())/float(w.getframerate())) logging.info(" ... File %s successfully loaded. [ OK ]" % (filename)) except Exception as e: logging.info(" ... File %s not loaded. [ ERROR ]" % (filename)) ShowInformation(self, self._prefs, 'Error loading: '+filename+': '+str(e), style=wx.ICON_ERROR) return False # set mediaplayer with the new one self._filename = filename self._mediaplayer = m self.ActivateButtons(True) self._offsets = (0,self._length) if self._playbackSlider is not None: self._playbackSlider.SetRange(0, self._length) self._playbackSlider.SetTickFreq(int(self._length/10), 1) self._timer.Start(self._refreshTimer) self.Refresh()
phoneme_classifier_SIGMA = "4.3589" phoneme_classifier_C = "1" phoneme_classifier_B = "0.8" phoneme_classifier_epochs = "1" phoneme_classifier_model = "models/pa_phoeneme_frame_based.C_%s.B_%s.sigma_%s.pad_%s.epochs_%s.model" % \ (phoneme_classifier_C, phoneme_classifier_B, phoneme_classifier_SIGMA, phoneme_classifier_pad, phoneme_classifier_epochs) # generate intermediate files from a temp filename (tmp_fd, tmp_filename) = tempfile.mkstemp() wav_filename = tmp_filename + ".16kHz.wav" mfc_filename = tmp_filename + ".mfc" mfcc_tmp_file = tmp_filename + ".mfc_delta" # read Wav file parameters wave_file = wave.Wave_read(args.wav_filename) wave_sampling_rate = wave_file.getframerate() wave_file.close() # converts WAV to 16kHz if wave_sampling_rate != 16000: cmd = "%s %s -r 16k %s remix 1" % (sox_bin, args.wav_filename, wav_filename) easy_call(cmd) rm_wav_file = True else: wav_filename = args.wav_filename rm_wav_file = False # extract MFCC features using HCopy utility cmd_params = "%s -C %s %s %s" % (hcopy_bin, htk_config, wav_filename, mfc_filename) easy_call(cmd_params)
def unpackMono(waveFile): w = wave.Wave_read(waveFile) data = [] for i in range(w.getnframes()): data.append(unpack("B", w.readframes(1))[0]) return (data)
playsound('indian.wav') # wav file says 'sorry!' # Lets try sorry.html url = f"{standard_url}sorry.html" response = requests.get(url, auth=HTTPBasicAuth('butter', 'fly')) page_contents = BeautifulSoup(response.text, 'html.parser') print(page_contents) #* - "what are you apologizing for?" # Let's play with wave module import wave indian = wave.Wave_read('indian.wav') print(f'''Number of channels: {indian.getnchannels()}' Sample width: {indian.getsampwidth()} Frame rate: {indian.getframerate()} Number of frames: {indian.getnframes()}''') # use parameters: .getparams()} to get al once new = wave.Wave_write('new.wav') new.setframerate(11025 * 2) # increase frame rate new.setsampwidth(1) # decrease sample width new.setnframes(55788) new.setnchannels(1) print(f'''Number of channels: {new.getnchannels()} Sample width: {new.getsampwidth()}
def run(self, musicbrainzid, fname): baseFname, ext = os.path.splitext(os.path.basename(fname)) wavfname, created = util.docserver_get_wav_filename(musicbrainzid) panelWidth = 900 # pixels panelHeight = 255 # pixels zoomlevels = self._zoom_levels # seconds options = coll.namedtuple( 'options', 'image_height fft_size image_width f_min f_max scale_exp pallete') options.image_height = panelHeight options.fft_size = self._fft_size options.f_min = self._f_min options.f_max = self._f_max options.pallete = self._pallete options.scale_exp = self._scale_exp ret = {} for zoom in zoomlevels: # At the beginning of each zoom level we reset the image_width # since we are modifying it at the end of the last zoom level options.image_width = panelWidth wvFile = wave.Wave_read(wavfname) framerate = wvFile.getframerate() totalframes = wvFile.getnframes() # We want this many frames per file at this zoom level. framesperimage = framerate * zoom wfname = "waveform%s" % zoom specname = "spectrum%s" % zoom inv_mfcc_name = "inv_mfcc_spectrum%s" % zoom wfdata = [] specdata = [] inv_mfcc_data = [] sumframes = 0 while sumframes < totalframes: if sumframes + framesperimage > totalframes: remaining_frames = (totalframes - sumframes) options.image_width = options.image_width * remaining_frames / framesperimage else: remaining_frames = framesperimage fp, smallname = tempfile.mkstemp(".wav") os.close(fp) data = wvFile.readframes(remaining_frames) wavout = wave.open(smallname, "wb") # This will set nframes, but writeframes resets it wavout.setparams(wvFile.getparams()) wavout.writeframes(data) wavout.close() sumframes += framesperimage specio = io.BytesIO() # Set the name attr so that PIL gets the filetype hint specio.name = "spec.png" wavio = io.BytesIO() wavio.name = "wav.png" in_mfcc_io = io.BytesIO() in_mfcc_io.name = "melspec.png" w2png.genimages(smallname, wavio, specio, in_mfcc_io, options) os.unlink(smallname) specdata.append(specio.getvalue()) wfdata.append(wavio.getvalue()) inv_mfcc_data.append(in_mfcc_io.getvalue()) ret[wfname] = wfdata ret[specname] = specdata ret[inv_mfcc_name] = inv_mfcc_data ret["smallfull"] = self.make_mini(wavfname) if created: os.unlink(wavfname) return ret
def run(self): def _frame_index_to_sec(frame_index): return (float(frame_index * rhino.frame_length) / float(rhino.sample_rate)) - float(1) """ Creates an input audio stream, initializes wake word detection (Porcupine) and speech to intent (Rhino) engines, and monitors the audio stream for occurrences of the wake word and then infers the intent from speech command that follows. """ porcupine = None rhino = None pa = None audio_stream = None wake_phrase_detected = True intent_extraction_is_finalized = False Apath = Video_to_Audio(self._video_path) wf = wave.Wave_read(Apath) ww, sr = soundfile.read(Video_to_Audio(self._video_path)) print(len(ww)) try: porcupine = Porcupine( library_path=self._porcupine_library_path, model_file_path=self._porcupine_model_file_path, keyword_file_paths=[self._porcupine_keyword_file_path], sensitivities=[0.5], ) rhino = Rhino( library_path=self._rhino_library_path, model_path=self._rhino_model_file_path, context_path=self._rhino_context_file_path, sensitivity=0.6, ) print() print( "****************************** context ******************************" ) print(rhino.context_info) print( "*********************************************************************" ) print() pa = pyaudio.PyAudio() audio_stream = pa.open( rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length, input_device_index=self._input_device_index, ) test = 0 Tpath = Apath.replace("wav", "txt") f = open(Tpath, "w") ouput = "" classtr = "" startcount = 0 endcount = 0 cango = 1 checkfirst = 0 data_csv = [["Class_num", "Start_time", "End_time"]] ClassNum = None Start_time = None Start_time2 = None ClassNum2 = None rm = None # NOTE: This is true now and will be correct possibly forever. If it changes the logic below need to change. assert porcupine.frame_length == rhino.frame_length try: while True: date = wf.readframes(porcupine.frame_length) pcm = audio_stream.read(porcupine.frame_length, exception_on_overflow=False) pcm = struct.unpack_from("h" * porcupine.frame_length, date) if self._output_path is not None: self._recorded_frames.append(pcm) if not wake_phrase_detected: wake_phrase_detected = porcupine.process(pcm) if wake_phrase_detected: print("detected wake phrase") elif not intent_extraction_is_finalized: intent_extraction_is_finalized = rhino.process(pcm) else: if rhino.is_understood(): cango = 1 intent, slot_values = rhino.get_intent() print() if intent == "EndWork": endcount += 1 classstr = " - %s" % _frame_index_to_sec(test) else: checkfirst += 1 startcount += 1 endcount = 0 for slot, value in slot_values.items(): print("%s: %s" % (slot, value)) classstr = ("%s: %s" % (slot, value)) + ( " start time is %s" % _frame_index_to_sec(test)) if startcount == 2: Start_time2 = Start_time ClassNum2 = ClassNum Start_time = _frame_index_to_sec(test) ClassNum = value print() print("intent : %s at time: %f" % (intent, _frame_index_to_sec(test))) print() else: print("didn't understand the command") cango = 0 rhino.reset() wake_phrase_detected = True intent_extraction_is_finalized = False print(startcount, endcount) print(ouput) if cango: if endcount == 1 and startcount == 0: ouput = classstr f.write("-1 class end at" + ouput + "\n") endcount = 0 ouput = "" data_csv.append( ["-1", "-1", _frame_index_to_sec(test)]) elif ouput == "" and endcount == 0 and startcount == 1: ouput = classstr elif ouput != "" and endcount == 1: try: data_csv.remove(rm) except: pass data_csv.append([ ClassNum, Start_time, _frame_index_to_sec(test) ]) ouput += classstr endcount = 0 startcount = 0 f.write(ouput + "\n") ouput = "" elif endcount == 0 and startcount == 2: if checkfirst == 2: data_csv.append( [ClassNum2, Start_time2, "-1"]) f.write(ouput + "\n") data_csv.append([ClassNum, Start_time, "-1"]) rm = [ClassNum, Start_time, "-1"] ouput = classstr f.write(ouput + "\n") startcount = 1 test += 1 except: print("EOF") print(_frame_index_to_sec(test)) data_csv.append(["Maybe miss", classstr, classstr]) f.write("Могла быть упущенная метка : %s" % classstr) with open("sw_data_new.csv", "w") as f: writer = csv.writer(f) for row in data_csv: writer.writerow(row) except KeyboardInterrupt: print("stopping ...") finally: if porcupine is not None: porcupine.delete() if rhino is not None: rhino.delete() if audio_stream is not None: audio_stream.close() if pa is not None: pa.terminate() if self._output_path is not None and len( self._recorded_frames) > 0: recorded_audio = np.concatenate(self._recorded_frames, axis=0).astype(np.int16) soundfile.write( os.path.expanduser(self._output_path), recorded_audio, samplerate=porcupine.sample_rate, subtype="PCM_16", )