def to_wav(self, frames): if type(frames) == list: frames = ''.join(frames) sample_size = 2L if self.RATE is not 16000: # Every STT engine needs 16kHz try: frames, _ = audioop.ratecv( frames, sample_size, 1, self.RATE, 16000, None ) except audioop.error, e: if e.message == "not a whole number of frames": # This means that either the first or the last byte is rubbish # If we delete the wrong byte we will get crap, i.e. loud # noise. if audioop.rms(frames[0:-1], 2) > audioop.rms(frames[1:], 2): frames = frames[1:] else: frames = frames[0:-1] print "Try again" with open('test.raw', 'wb') as fp: fp.write(frames) frames, _ = audioop.ratecv( frames, sample_size, 1, self.RATE, 16000, None ) else: raise e
def test_ratecv(self): for w in 1, 2, 3, 4: self.assertEqual(audioop.ratecv(b'', w, 1, 8000, 8000, None), (b'', (-1, ((0, 0),)))) self.assertEqual(audioop.ratecv(bytearray(), w, 1, 8000, 8000, None), (b'', (-1, ((0, 0),)))) self.assertEqual(audioop.ratecv(memoryview(b''), w, 1, 8000, 8000, None), (b'', (-1, ((0, 0),)))) self.assertEqual(audioop.ratecv(b'', w, 5, 8000, 8000, None), (b'', (-1, ((0, 0),) * 5))) self.assertEqual(audioop.ratecv(b'', w, 1, 8000, 16000, None), (b'', (-2, ((0, 0),)))) self.assertEqual(audioop.ratecv(datas[w], w, 1, 8000, 8000, None)[0], datas[w]) state = None d1, state = audioop.ratecv(b'\x00\x01\x02', 1, 1, 8000, 16000, state) d2, state = audioop.ratecv(b'\x00\x01\x02', 1, 1, 8000, 16000, state) self.assertEqual(d1 + d2, b'\000\000\001\001\002\001\000\000\001\001\002') for w in 1, 2, 3, 4: d0, state0 = audioop.ratecv(datas[w], w, 1, 8000, 16000, None) d, state = b'', None for i in range(0, len(datas[w]), w): d1, state = audioop.ratecv(datas[w][i:i + w], w, 1, 8000, 16000, state) d += d1 self.assertEqual(d, d0) self.assertEqual(state, state0)
def messageHandler(conn): # this is to deal with a single incoming TCP or UDP message global fifo, previous, missed, state # the actual packet length will not be 2048 bytes, but depends on the format and number of audio samples buf = sock.recv(2048) if len(buf) < 12: return # see https://en.wikipedia.org/wiki/RTP_payload_formats (version, type, counter, timestamp, id) = struct.unpack('BBHII', buf[0:12]) if version != 128: raise RuntimeError('unsupported packet version') fragment = bytearray(buf[12:]) with fifolock: if not id in fifo: fifo[id] = RingBuffer( id, rate) # make a buffer that can hold one second of audio previous[id] = None state[id] = None missed[id] = 0 if type == 0: # type=0 PCMU audio 1 8000 any 20 ITU-T G.711 PCM μ-Law audio 64 kbit/s RFC 3551 fragment = audioop.ulaw2lin(fragment, 2) fragment, state[id] = audioop.ratecv(fragment, 2, 1, 8000, 44100, state[id]) dat = np.frombuffer(fragment, np.int16) elif type == 1: # type=8 PCMA audio 1 8000 any 20 ITU-T G.711 PCM A-Law audio 64 kbit/s RFC 3551 fragment = audioop.alaw2lin(fragment, 2) fragment, state[id] = audioop.ratecv(fragment, 2, 1, 8000, 44100, state[id]) dat = np.frombuffer(fragment, np.int16) elif type == 11: # type=11 L16 audio 1 44100 any 20 Linear PCM 16-bit audio 705.6 kbit/s, uncompressed RFC 3551, Page 27 dat = np.frombuffer(fragment, np.int16) else: raise RuntimeError('unsupported RTP packet type') if not previous[id] == None: for missing in range(previous[id] + 1 - counter, 0): logger.debug('missed packet from %d' % (id)) # See https://en.wikipedia.org/wiki/Comfort_noise missing_dat = np.random.random( len(dat)) # FIXME these are only positive missing_dat *= np.linalg.norm(dat) / np.linalg.norm( missing_dat) missing_timestamp = timestamp + missing * len( dat) * 1000 / 44100 missed[id] += 1 fifo[id].push(missing_dat.astype(np.int16), missing_timestamp) previous[id] = counter fifo[id].push(dat, timestamp)
def testratecv(data): if verbose: print 'ratecv' state = None d1, state = audioop.ratecv(data[0], 1, 1, 8000, 16000, state) d2, state = audioop.ratecv(data[0], 1, 1, 8000, 16000, state) if d1 + d2 != '\000\000\001\001\002\001\000\000\001\001\002': return 0 return 1
def get_raw_data(self, convert_rate = None, convert_width = None): """ Returns a byte string representing the raw frame data for the audio represented by the ``AudioData`` instance. If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match. If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match. Writing these bytes directly to a file results in a valid `RAW/PCM audio file <https://en.wikipedia.org/wiki/Raw_audio_format>`__. """ assert convert_rate is None or convert_rate > 0, "Sample rate to convert to must be a positive integer" assert convert_width is None or (convert_width % 1 == 0 and 1 <= convert_width <= 4), "Sample width to convert to must be between 1 and 4 inclusive" raw_data = self.frame_data # make sure unsigned 8-bit audio (which uses unsigned samples) is handled like higher sample width audio (which uses signed samples) if self.sample_width == 1: raw_data = audioop.bias(raw_data, 1, -128) # subtract 128 from every sample to make them act like signed samples # resample audio at the desired rate if specified if convert_rate is not None and self.sample_rate != convert_rate: raw_data, _ = audioop.ratecv(raw_data, self.sample_width, 1, self.sample_rate, convert_rate, None) pass # convert samples to desired byte format if specified if convert_width is not None and self.sample_width != convert_width: raw_data = audioop.lin2lin(raw_data, self.sample_width, convert_width) # if the output is 8-bit audio with unsigned samples, convert the samples we've been treating as signed to unsigned again if convert_width == 1: raw_data = audioop.bias(raw_data, 1, 128) # add 128 to every sample to make them act like unsigned samples again return raw_data
def preprocess(oldFileName): #preprocess takes in the name of a .wav file (oldFileName) and returns #u-law downsampled version of the file file = wave.open(filename, "rb") num_channels = file.getnchannels() sample_rate = file.getframerate() sample_width = file.getsampwidth() num_frames = file.getnframes() #Grab the bytes from our WAV file raw_frames = file.readframes(num_frames) file.close() total_samples = num_frames * num_channels if sample_rate != SAMPLE_RATE: u_law = audioop.ratecv(raw_frames, sample_width, num_channels, sample_rate, SAMPLE_RATE, None) u_law = audioop.lin2ulaw(u_law[0], sample_width) else: u_law = audioop.lin2ulaw(raw_frames, sample_width) u_law = list(u_law) u_law = [ord(x) // Q_FACTOR for x in u_law] return np.asarray(u_law)
def open_input(filename): stream = wave.open(filename, "rb") input_num_channels = stream.getnchannels() input_sample_rate = stream.getframerate() input_sample_width = stream.getsampwidth() input_num_frames = stream.getnframes() raw_data = stream.readframes(input_num_frames) # Returns byte data stream.close() total_samples = input_num_frames * input_num_channels print "Sample Width: {} ({}-bit)".format(input_sample_width, 8 * input_sample_width) print "Number of Channels: " + str(input_num_channels) print "Sample Rate " + str(input_sample_rate) print "Number of Samples: " + str(total_samples) print "Duration: {0:.2f}s".format(total_samples / float(input_sample_rate)) print "Raw Data Size: " + str(len(raw_data)) if input_sample_rate != SAMPLE_RATE: u_law = audioop.ratecv(raw_data, input_sample_width, input_num_channels, input_sample_rate, SAMPLE_RATE, None) u_law = audioop.lin2ulaw(u_law[0], input_sample_width) else: u_law = audioop.lin2ulaw(raw_data, input_sample_width) u_law = list(u_law) u_law = [ord(x) // Q_FACTOR for x in u_law] return np.asarray(u_law), input_sample_rate
def label_dir(dir_name): files = os.listdir(dir_name) for file_name in files: key = os.path.splitext(file_name)[0] sound = ds.get(sound_key) if sound: print("Already classified:", key) continue try: wave_file = wave.open(dir_name + "/" + file_name, 'r') except: print("Error:", file_name) continue length = wave_file.getnframes() wave_data = wave_file.readframes(length) state = None content, state = audioop.ratecv(wave_data, 2, 1, 48000, 16000, state) result = transcribe(content) print(result) label = file_name.split("_", 1)[0] sound = Sound() sound.identifier = key sound.label_translit = label if result and result.alternatives and len(result.alternatives) > 0: sound.google_transcript = result.alternatives[0].transcript sound.google_confidence = result.alternatives[0].confidence sound.label_ge = get_label_ge(label) # sound['raw'] = content sound.put() print("Saved:", sound)
def coerce_lin(source_aiff, template_obj): '''Read data from source, and convert it to match template's params.''' import audioop frag = source_aiff.read_lin() Ss = source_aiff.stream St = template_obj.stream # Sample width if Ss.getsampwidth() != St.getsampwidth(): print 'coerce sampwidth %i -> %i' %(Ss.getsampwidth(), St.getsampwidth()) frag = audioop.lin2lin(frag, Ss.getsampwidth(), St.getsampwidth()) width = St.getsampwidth() # Channels if Ss.getnchannels() != St.getnchannels(): print 'coerce nchannels %i -> %i' %(Ss.getnchannels(), St.getnchannels()) if Ss.getnchannels()==2 and St.getnchannels()==1: frag = audioop.tomono(frag, width, 0.5, 0.5) elif Ss.getnchannels()==1 and St.getnchannels()==2: frag = audioop.tostereo(frag, width, 1.0, 1.0) else: print "Err: can't match channels" # Frame rate if Ss.getframerate() != St.getframerate(): print 'coerce framerate %i -> %i' %(Ss.getframerate(), St.getframerate()) frag,state = audioop.ratecv( frag, width, St.getnchannels(), Ss.getframerate(), # in rate St.getframerate(), # out rate None, 2,1 ) return frag
def run(self): rate_conversion_state = None # Open audio file with Audioread module. This may crash if proper decoders are not installed! with audioread.audio_open(self.filename) as dec: self.seconds_duration = dec.duration bps = 2 * dec.channels * dec.samplerate self.ready = True for buf in dec: # Wait if there is no need to fill the buffer while self.mumble.sound_output.get_buffer_size( ) > 2.0 and self._run: time.sleep(0.01) if not self._run: return # Update position self.bytes_position += len(buf) self.seconds_position = self.bytes_position / bps # Convert audio if necessary. We want precisely 16bit 48000Hz mono audio for mumble. if dec.channels != 1: buf = audioop.tomono(buf, 2, 0.5, 0.5) if dec.samplerate != 48000: buf, rate_conversion_state = audioop.ratecv( buf, 2, 1, dec.samplerate, 48000, rate_conversion_state) if self.volume: buf = audioop.mul(buf, 2, self.volume) # Insert to mumble outgoing buffer self.mumble.sound_output.add_sound(buf)
def to44KStereo(self, buffer): try: b = audioop.tostereo(buffer.data, 2, 1, 1) b, self.fromstate = audioop.ratecv(b, 2, 2, 8000, 44100, self.fromstate) except audioop.error: return '' return b
def raw_read(self): """Return some amount of data as a raw audio string""" buf = self.source.raw_read() if buf is None: self.eof = True return None # Convert channels as needed if self.set_channels and self.source.channels() != self.set_channels: if self.set_channels == 1: buf = audioop.tomono(buf, self.source.raw_width(), .5, .5) else: buf = audioop.tostereo(buf, self.source.raw_width(), 1, 1) # Convert sampling rate as needed if self.set_sampling_rate and self.source.sampling_rate() != self.set_sampling_rate: (buf, self.ratecv_state) = audioop.ratecv(buf, self.source.raw_width(), self.channels(), self.source.sampling_rate(), self.set_sampling_rate, self.ratecv_state) if self.set_raw_width and self.source.raw_width() != self.set_raw_width: if self.source.raw_width() == 1 and self.source.has_unsigned_singles(): buf = audioop.bias(buf, 1, -128) buf = audioop.lin2lin(buf, self.source.raw_width(), self.set_raw_width) if self.set_raw_width == 1 and self.source.has_unsigned_singles(): buf = audioop.bias(buf, 1, 128) return buf
def downsampleWav(src, dst, inrate=44100, outrate=22050, inchannels=1, outchannels=1): if not os.path.exists(src): print 'Source not found!' return False s_read = wave.open(src, 'r') s_write = wave.open(dst, 'w') n_frames = s_read.getnframes() data = s_read.readframes(n_frames) try: converted = audioop.ratecv(data, 2, inchannels, inrate, outrate, None) if outchannels == 1: converted = audioop.tomono(converted[0], 2, 1, 0) except: print 'Failed to downsample wav' return False try: s_write.setparams((outchannels, 2, outrate, 0, 'NONE', 'Uncompressed')) s_write.writeframes(converted) except: print 'Failed to write wav' return False try: s_read.close() s_write.close() except: print 'Failed to close wav files' return False return True
def downsample(buf, outrate=16000): """Downsample audio. Required for voice detection. :param buf: Audio data buffer (or path to WAV file). :param int outrate: Output audio sample rate in Hz. :returns: Output buffer. :rtype: BytesIO """ wav = wave.open(buf) inpars = wav.getparams() frames = wav.readframes(inpars.nframes) # Convert to mono if inpars.nchannels == 2: frames = audioop.tomono(frames, inpars.sampwidth, 1, 1) # Convert to 16-bit depth if inpars.sampwidth > 2: frames = audioop.lin2lin(frames, inpars.sampwidth, 2) # Convert frame rate to 16000 Hz frames, _ = audioop.ratecv(frames, 2, 1, inpars.framerate, outrate, None) # Return a BytesIO version of the output outbuf = BytesIO() out = wave.open(outbuf, "w") out.setnchannels(1) out.setsampwidth(2) out.setframerate(outrate) out.writeframes(frames) out.close() outbuf.seek(0) return outbuf
def loudness_test(self): self.slid_win = deque(maxlen=6) self.frames = [] self.recording = True stream = self.p.open( format=pyaudio.paInt16, rate=48000, channels=1, # change this to what your sound card supports input_device_index=2, # change this your input sound card index input=True, output=False, frames_per_buffer=1024 * 8, stream_callback=self.callback) print("* Recording audio...") stream.start_stream() while self.recording: time.sleep(0.1) print("* done\n") stream.stop_stream() stream.close() state = None result = audioop.ratecv(b''.join(self.frames), 1, 1, 48000, 16000, state) return result[0]
def get_raw_data(self, convert_rate=None, convert_width=None): """ Returns a byte string representing the raw frame data for the audio represented by the ``AudioData`` instance. If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match. If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match. Writing these bytes directly to a file results in a valid `RAW/PCM audio file <https://en.wikipedia.org/wiki/Raw_audio_format>`__. """ assert convert_rate is None or convert_rate > 0, "Sample rate to convert to must be a positive integer" assert convert_width is None or ( convert_width % 1 == 0 and 2 <= convert_width <= 4 ), "Sample width to convert to must be 2, 3, or 4" raw_data = self.frame_data # resample audio at the desired rate if specified if convert_rate is not None and self.sample_rate != convert_rate: raw_data, _ = audioop.ratecv(raw_data, self.sample_width, 1, self.sample_rate, convert_rate, None) # convert samples to desired byte format if specified if convert_width is not None and self.sample_width != convert_width: raw_data = audioop.lin2lin(raw_data, self.sample_width, convert_width) return raw_data
def insert_feedback(input): # break out input instance = input[0] feedback = input[1] # instance/feedback[raw_audio, sample_rate] audio = instance[0] ref_rate = instance[1] # Convert fb sample rate to match instances's fb_converted = audioop.ratecv( feedback[0], # input feedback[0].itemsize, # bit depth (bytes) 1, feedback[1], # channels, inrate ref_rate, # outrate None) # state..? fb_converted = list(np.frombuffer(fb_converted[0], dtype=np.int16)) # prepend feedback with random length of silence up to 1 second # pad to match audio size prepend = random.randint(0, ref_rate*1) pad = len(audio) - (len(fb_converted) + prepend) fb_converted = [0]*prepend + fb_converted + [0]*pad # add element-wise sample = [int(fb_converted[x]/2+audio[x]/2) for x in range(0, len(audio))] return [sample, ref_rate]
def speech_to_text(self, audio_frames): with self.stt.start_utterance(): (resampled, _) = audioop.ratecv(audio_frames, self.WIDTH, self.CHANNELS, self.SAMPLE_RATE, self.TARGET_RATE, None) self.stt.process_raw(resampled, False, False) return self.stt.hypothesis()
def audio_data_converter(raw_data: bytes, in_settings: AudioSettings, out_settings: AudioSettings) -> bytes: in_sample_rate = in_settings.sample_rate out_sample_rate = out_settings.sample_rate in_sample_width = in_settings.sample_width out_sample_width = out_settings.sample_width if in_settings.channels != out_settings.channels: raise RuntimeError( 'Can not convert audio data. The number of channels must be the same.' ) if in_sample_rate == out_sample_rate and in_sample_width == out_sample_width: return raw_data # make sure unsigned 8-bit audio (which uses unsigned samples) # is handled like higher sample width audio (which uses signed samples) if in_sample_width == 1: # subtract 128 from every sample to make them act like signed samples raw_data = audioop.bias(raw_data, 1, -128) if in_sample_rate != out_sample_rate: raw_data, _ = audioop.ratecv(raw_data, in_sample_width, in_settings.channels, in_sample_rate, out_sample_rate, None) if in_sample_width != out_sample_width: # we're converting the audio into 24-bit # workaround for https://bugs.python.org/issue12866 if out_sample_width == 3: # convert audio into 32-bit first, which is always supported raw_data = audioop.lin2lin(raw_data, in_sample_width, 4) try: # test whether 24-bit audio is supported # for example, ``audioop`` in Python 3.3 and below don't support # sample width 3, while Python 3.4+ do audioop.bias(b"", 3, 0) except audioop.error: # this version of audioop doesn't support 24-bit audio # (probably Python 3.3 or less) # # since we're in little endian, # we discard the first byte from each 32-bit sample to get a 24-bit sample raw_data = b"".join(raw_data[i + 1:i + 4] for i in range(0, len(raw_data), 4)) else: # 24-bit audio fully supported, we don't need to shim anything raw_data = audioop.lin2lin(raw_data, in_sample_width, out_sample_width) else: raw_data = audioop.lin2lin(raw_data, in_sample_width, out_sample_width) # if the output is 8-bit audio with unsigned samples, # convert the samples we've been treating as signed to unsigned again if out_sample_width == 1: # add 128 to every sample to make them act like unsigned samples again raw_data = audioop.bias(raw_data, 1, 128) return raw_data
def mic_to_ws(): # uses stream try: print(sys.stderr, "\nLISTENING TO MICROPHONE") last_state = None while True: data = stream.read(self.chunk) if self.audio_gate > 0: rms = audioop.rms(data, 2) if rms < self.audio_gate: data = '\00' * len(data) #if sample_chan == 2: # data = audioop.tomono(data, 2, 1, 1) if sample_rate != self.byterate: (data, last_state) = audioop.ratecv(data, 2, 1, sample_rate, self.byterate, last_state) self.send_data(data) except IOError as e: # usually a broken pipe print(e) except AttributeError: # currently raised when the socket gets closed by main thread pass # to voluntarily close the connection, we would use #self.send_data("") #self.send("EOS") try: self.close() except IOError: pass
def main(): sample_rate = 48000 channels = 2 N = 4096 * 4 mic = Microphone(sample_rate, channels) window = np.hanning(N) sound_speed = 343.2 distance = 0.14 max_tau = distance / sound_speed def signal_handler(sig, num): print('Quit') mic.close() signal.signal(signal.SIGINT, signal_handler) for data in mic.read_chunks(N): buf = np.fromstring(data, dtype='int16') mono = buf[0::channels].tostring() if sample_rate != 16000: mono, _ = audioop.ratecv(mono, 2, 1, sample_rate, 16000, None) if True: tau, _ = gcc_phat(buf[0::channels] * window, buf[1::channels] * window, fs=sample_rate, max_tau=max_tau) theta = math.asin(tau / max_tau) * 180 / math.pi print('\ntheta: {}'.format(int(theta)))
def load_wav(file_name, def_sample_width=2, def_sample_rate=16000): """Reads all audio data from the file and returns it in as bytes. The content is re-sampled into the default sample rate.""" try: wf = wave.open(file_name, 'r') if wf.getnchannels() != 1: raise Exception('Input wave is not in mono') if wf.getsampwidth() != def_sample_width: raise Exception('Input wave is not in %d Bytes' % def_sample_width) sample_rate = wf.getframerate() # read all the samples chunk, pcm = 1024, b'' pcmPart = wf.readframes(chunk) while pcmPart: pcm += str(pcmPart) pcmPart = wf.readframes(chunk) except EOFError: raise Exception('Input PCM is corrupted: End of file.') else: wf.close() # resample audio if not compatible if sample_rate != def_sample_rate: import audioop pcm, state = audioop.ratecv(pcm, 2, 1, sample_rate, def_sample_rate, None) return pcm
def convert_framerate(fragment, width, nchannels, framerate_in, framerate_out): """ Convert framerate (sampling rate) of the input fragment. Parameters ---------- fragment : bytes object Specifies the original fragment. width : int Specifies the fragment's original sampwidth. nchannels : int Specifies the fragment's original nchannels. framerate_in : int Specifies the fragment's original framerate. framerate_out : int Specifies the fragment's desired framerate. Returns ------- bytes """ if framerate_in == framerate_out: return fragment new_fragment, _ = audioop.ratecv(fragment, width, nchannels, framerate_in, framerate_out, None) return new_fragment
def _write_frames_to_file(self, frames, framerate, volume): with tempfile.NamedTemporaryFile( mode='w+b', suffix=".wav", prefix=datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) as f: wav_fp = wave.open(f, 'wb') wav_fp.setnchannels(self._input_device._input_channels) wav_fp.setsampwidth(int(self._input_device._input_bits / 8)) wav_fp.setframerate(framerate) if self._input_device._input_rate == framerate: fragment = b''.join(frames) else: fragment = audioop.ratecv( ''.join(frames), int(self._input_device._input_bits / 8), self._input_device._input_channels, self._input_device._input_rate, framerate, None)[0] if volume is not None: maxvolume = audioop.minmax( fragment, self._input_device._input_bits / 8)[1] fragment = audioop.mul(fragment, int(self._input_device._input_bits / 8), volume * (2.**15) / maxvolume) wav_fp.writeframes(fragment) wav_fp.close() f.seek(0) yield f
def encode(self, frame, force_keyframe=False): assert frame.format.name == 's16' assert frame.layout.name in ['mono', 'stereo'] channels = len(frame.layout.channels) data = bytes(frame.planes[0]) timestamp = frame.pts # resample at 48 kHz if frame.sample_rate != SAMPLE_RATE: data, self.rate_state = audioop.ratecv(data, SAMPLE_WIDTH, channels, frame.sample_rate, SAMPLE_RATE, self.rate_state) timestamp = (timestamp * SAMPLE_RATE) // frame.sample_rate # convert to stereo if channels == 1: data = audioop.tostereo(data, SAMPLE_WIDTH, 1, 1) length = lib.opus_encode(self.encoder, ffi.cast('int16_t*', ffi.from_buffer(data)), SAMPLES_PER_FRAME, self.cdata, len(self.cdata)) assert length > 0 return [self.buffer[0:length]], timestamp
def encode(self, frame: Frame, force_keyframe: bool = False) -> Tuple[List[bytes], int]: assert isinstance(frame, AudioFrame) assert frame.format.name == "s16" assert frame.layout.name in ["mono", "stereo"] channels = len(frame.layout.channels) data = bytes(frame.planes[0]) timestamp = frame.pts # resample at 8 kHz if frame.sample_rate != SAMPLE_RATE: data, self.rate_state = audioop.ratecv( data, SAMPLE_WIDTH, channels, frame.sample_rate, SAMPLE_RATE, self.rate_state, ) timestamp = (timestamp * SAMPLE_RATE) // frame.sample_rate # convert to mono if channels == 2: data = audioop.tomono(data, SAMPLE_WIDTH, 1, 1) data = self._convert(data, SAMPLE_WIDTH) return [data], timestamp
def play(self, data): """based on http://stackoverflow.com/questions/17657103/how-to-play-wav-file-in-python""" # open a wav format music f = wave.open(data, 'rb') # instantiate PyAudio p = pyaudio.PyAudio() # print(p.get_device_info_by_index(5).get('name')) # open stream stream = p.open(format=p.get_format_from_width(f.getsampwidth()), channels=f.getnchannels(), rate=48000, output=True) #, # output_device_index = 5) # read data data = f.readframes(self.chunk) # play stream while len(data) > 0: data, _ = audioop.ratecv(data, 2, 1, int(16000 * self.speed), 48000, None) stream.write(data) data = f.readframes(self.chunk) # stop stream stream.stop_stream() stream.close() # close PyAudio p.terminate()
def resample_to_def_sample_rate(pcm, sample_rate, def_sample_rate): if sample_rate != def_sample_rate: import audioop pcm, state = audioop.ratecv(pcm, 2, 1, sample_rate, def_sample_rate, None) return pcm
def downsampleWav(data, dst, inrate=sampleRate, outrate=16000, inchannels=1, outchannels=1): try: s_write = wave.open(dst, 'wb') except: print('Failed to open files!') return False try: converted = audioop.ratecv(data, 2, inchannels, inrate, outrate, None) if outchannels == 1 and inchannels != 1: converted = audioop.tomono(converted[0], 2, 1, 0) except: print('Failed to downsample wav') return False try: s_write.setparams((outchannels, 2, outrate, 0, 'NONE', 'Uncompressed')) s_write.writeframes(converted[0]) except Exception as e: print(e) print('Failed to write wav') return False try: s_write.close() except: print('Failed to close wav files') return False return True
def MumbleSoundReceivedHandler(user, soundchunk): #print("Received sound from user \"" + user['name'] + "\".") # Convert sound format. Mumble uses 16 bit mono 48 kHz little-endian, which needs to be downsampled to 8 kHz: buffer, newstate = audioop.ratecv(soundchunk.pcm, 2, 1, 48000, AudioSlot1.PCMSAMPLERATE, None) if MumbleVolume != 1: buffer = audioop.mul(buffer, 2, MumbleVolume) AudioSlot1.playBuffer(buffer, DMR_CallType, DMR_DstId)
def convert_to(self, data: bytes, to_depth: int, to_channels: int, to_rate: int, to_unsigned: bool = False) -> bytes: """Convert audio data.""" dest_width = to_depth // 8 print(to_depth, self._depth) if self._depth != to_depth: if self._depth == 8: data = audioop.bias(data, 1, 128) data = audioop.lin2lin(data, self._width, dest_width) if to_depth == 8: data = audioop(data, 1, 128) if self._unsigned != to_unsigned: data = audioop.bias(data, dest_width, 128) # Make it stereo if self._channels < to_channels: data = audioop.tostereo(data, dest_width, 1, 1) # Make it mono elif self._channels > to_channels: data = audioop.tomono(data, dest_width, 1, 1) # print(dest_width) # Convert the sample rate of the data to the requested rate. if self._rate != to_rate and data: data, self._state = audioop.ratecv(data, dest_width, to_channels, self._rate, to_rate, self._state, 2, 1) return data
def record(self, firstChunk=None): print('Recording beginning') rec = [] recResampled = [] if firstChunk: rec.append(firstChunk) current = time.time() end = time.time() + self.timeoutLength while current <= end: data = self.stream.read(chunk) if self.rms(data) >= self.audioThreshold: end = time.time() + self.timeoutLength current = time.time() rec.append(data) recResampled = audioop.ratecv(b"".join(rec), 2, 1, RATE, 16000, None) return recResampled[0]
def add_wav_sound(self, name): """ Adds a WAV file to the sound library """ if not name in self.sounds: if not os.path.exists(name): raise IOError ("File %s don't exists" % name) if self.__btc_codec == 'DM': normalize = 0.85 else: normalize = 0.5 sr, samples, info = read_wav(name, normalize) # Resample to lib bitrate if sr != self.__bitrate: samples, _ = audioop.ratecv(samples, BITS, 1, sr, \ self.__bitrate, None) name = name.split('.')[0] self.sounds[name] = {'inputwave': samples, 'resultwave': None, \ 'bitstream': None, 'info': info} self.__snames.append(name) return True else: return False
def __init__(self, fname, newrate=0): """Initializes an audio file from an uncompressed wavefile on disk. The file is converted to mono, and if newrate is positive, then the rate is converted""" import wave, audioop try: # see if we have numpy from numpy import array self.numpy = 1 except ImportError: self.numpy = 0 # read data f = wave.open(fname, "rb") nchans, w, fps, nframes, comptype, compname = f.getparams() print "Read audio file %s with %d chans, %d width, %d fps and %d frames" % (fname, nchans, w, fps, nframes) self.width, self.fps = w, fps self.dat = f.readframes(nframes) print " Original data length was %d" % (len(self.dat)) # convert to mono and (optionally) convert the rate self.dat = audioop.tomono(self.dat, w, 0.5, 0.5) print " After mono, data length is %d" % (len(self.dat)) if newrate > 0: self.dat, junk = audioop.ratecv(self.dat, w, 1, fps, newrate, None) self.fps = newrate print " Converted to new rate %s, and data length is now %d" % (self.fps, len(self.dat)) # now extract the data into a simple array from audioop import getsample self.dat = [abs(getsample(self.dat, w, i)) for i in range(len(self.dat) // w)] print " Final data length is now of length %s" % (len(self.dat),) if self.numpy: self.dat = array(self.dat)
def write(self, audio): if self._user_resample: # The user can also specify to have ULAW encoded source audio # converted to linear encoding upon being written. if self._user_resample.ulaw2lin: # Possibly skip downsampling if this was triggered, as # while ULAW encoded audio can be sampled at rates other # than 8KHz, since this is telephony related, it's unlikely. audio = audioop.ulaw2lin(audio, 2) # If the audio isn't already sampled at 8KHz, # then it needs to be downsampled first if self._user_resample.rate != 8000: audio, self._user_resample.ratecv_state = audioop.ratecv( audio, 2, self._user_resample.channels, self._user_resample.rate, 8000, self._user_resample.ratecv_state, ) # If the audio isn't already in mono, then # it needs to be downmixed as well if self._user_resample.channels == 2: audio = audioop.tomono(audio, 2, 1, 1) self._tx_q.put(audio)
def transpose(snd, amount, chans=2): """ Change the speed of a sound. 1.0 is unchanged, 0.5 is half speed, 2.0 is twice speed, etc. This is a wrapper for audioop.ratecv in the standard library.""" amount = 1.0 / float(amount) rate = int(audio_params[2] * amount) return audioop.ratecv(snd, audio_params[1], chans, audio_params[2], rate, None)[0]
def findfit(scratch_frag, final_frag, sound_file): '''Calculates the offset (in seconds) between scratch_frag & final_frag. Both fragments are assumed to contain the same, loud "clapper" event. The SoundFile object is used for common stream parameters.''' import audioop nchannels = sound_file.stream.getnchannels() framerate = sound_file.stream.getframerate() width = sound_file.stream.getsampwidth() assert(width==2) # Simplify the sound streams to make it quicker to find a match. # Left channel only. if nchannels > 1: scratch_frag_ = audioop.tomono(scratch_frag, width, 1, 0) final_frag_ = audioop.tomono(final_frag, width, 1, 0) else: scratch_frag_ = scratch_frag final_frag_ = final_frag nchannels_ = 1 # Downsample to 8000/sec framerate_ = 8000 scratch_frag_,state =\ audioop.ratecv(scratch_frag_, width, nchannels_, framerate, framerate_, None) final_frag_,state =\ audioop.ratecv(final_frag_, width, nchannels_, framerate, framerate_, None) bytes_per_second_ = nchannels_ * framerate_ * width # Find the clapper in final length_samples = int(0.001 * framerate * nchannels_) # 0.1 sec final_off_samples = audioop.findmax(final_frag_, length_samples) # Search for a 2 second 'needle' centred on where we found the 'clapper' needle_bytes = 2 * bytes_per_second_ b0 = max(0, final_off_samples * width - int(needle_bytes/2)) print '"clapper" at final:', 1.0*b0/bytes_per_second_, 'sec' b1 = b0 + needle_bytes final_clapper_frag = final_frag_[b0:b1] scratch_off_samples,factor = audioop.findfit(scratch_frag_, final_clapper_frag) scratch_off_bytes = scratch_off_samples * width print 'match at scratch:', 1.0*scratch_off_bytes/bytes_per_second_, 'sec', " factor =",factor # Calculate the offset (shift) between the two fragments. shift_sec = (scratch_off_bytes - b0) * 1.0 / bytes_per_second_ print 'shift =', shift_sec, 'seconds' return shift_sec
def transpose(audio_string, amount): """ Transpose an audio fragment by a given amount. 1.0 is unchanged, 0.5 is half speed, 2.0 is twice speed, etc """ amount = 1.0 / float(amount) audio_string = audioop.ratecv(audio_string, audio_params[1], audio_params[0], audio_params[2], int(audio_params[2] * amount), None) return audio_string[0]
def set_frame_rate(self, frame_rate): if frame_rate == self.frame_rate: return self converted, _ = audioop.ratecv(self._data, self.sample_width, self.channels, self.frame_rate, frame_rate, None) return self._spawn(data=converted, overrides={'frame_rate': frame_rate})
def resample(self, rate, new_rate=16000): """ Return re-sampled frames. :param rate: (int) current frame rate of the frames :param new_rate: (int) new frame rate of the frames :returns: (str) converted frames """ return audioop.ratecv(self._frames, self._sampwidth, self._nchannels, rate, new_rate, None)[0]
def chunks(self, pcm, sample_rate): if len(pcm) == 0: yield b"", b"" else: for i in xrange(0, len(pcm), self.buffer_length): original_pcm = pcm[i:i+self.buffer_length] resampled_pcm, self.state = audioop.ratecv(original_pcm, 2, 1, sample_rate, self.default_sample_rate, self.state) yield original_pcm, resampled_pcm
def convert_sample_rate(self, out_rate): new_byte_data, _ = audioop.ratecv(self.BYTE_DATA, self.BIT_WIDTH, self.CHANNELS, self.SAMPLE_RATE, out_rate, None) self.__sample_rate = out_rate self.__byte_data = new_byte_data
def resample(self, rate, newrate): """ Return resampled frames. @param rate (int) current framerate of the frames @param newrate (int) new framerate of the frames @return converted frames """ return audioop.ratecv(self.frames, self.sampwidth, self.nchannels, rate, newrate, None)[0]
def read(self, buf, source_channels): source_sample_width = pyaudio.get_sample_size(pyaudio.paInt16) * source_channels audio = buf[3:] try: # sometimes the data received is incomplete so reusing state # data from ratecv() sometimes results in errors (audio, _) = audioop.ratecv(audio, source_sample_width, source_channels, 48000, self.listener.sample_rate, None) audio = audioop.tomono(audio, self.listener.sample_width, 0.5, 0.5) self.listener.read(audio) except audioop.error, e: logger.warn("Error preparing sample", exc_info=True)
def write(self, in_array): """ prepare at least 'items' samples. """ fragment, self.state = audioop.ratecv(in_array, 2, CHANNELS, self.in_hz, self.out_hz, self._state) ar = numarray.fromstring(fragment, type=numarray.Int16) ringbuffer.RingBuffer.write(self, ar)
def resample(self, samplerate): """ Resamples to a different sample rate, without changing the pitch and duration of the sound. The algorithm used is simple, and it will cause a loss of sound quality. """ assert not self.__locked if samplerate == self.__samplerate: return self self.__frames = audioop.ratecv(self.__frames, self.samplewidth, self.nchannels, self.samplerate, samplerate, None)[0] self.__samplerate = samplerate return self
def transpose(snd, amount, chans=None): """ Change the speed of a sound. 1.0 is unchanged, 0.5 is half speed, 2.0 is twice speed, etc. This is a wrapper for audioop.ratecv in the standard library.""" if chans is None: chans = CHANNELS amount = 1.0 / float(amount) rate = int(SAMPLING_RATE * amount) return audioop.ratecv(snd, BIT_WIDTH, chans, SAMPLING_RATE, rate, None)[0]
def convert_wave_data(f_rate,frame_count,sample_width,channels,data): """ Convert wave sample data into pleo format """ if channels==2: data = audioop.tomono(data,sample_width,1,1) data = audioop.mul(data,sample_width,0.97999999999999998) data = audioop.ratecv(data,sample_width,1,f_rate,11025,None,4,4)[0] if sample_width==1: data = audioop.bias(data,1,-128) data = audioop.lin2lin(data,1,2) data = audioop.mul(data,2,(1.0/256)) data = audioop.lin2adpcm(data,2,None)[0] return (11025,frame_count,sample_width,1,data)
def pulsaret(slice, params, index): amp = ((params['amp'][1] - params['amp'][0]) * params['amp'][2][index]) + params['amp'][0] slice = pan(slice, params['pan_pos'], amp) freq_width = params['freq'][2][index] * (params['freq'][1] - params['freq'][0]) + params['freq'][0] target_rate = int(audio_params[2] * (1.0 / float(freq_width))) if target_rate == audio_params[2]: return slice else: slice = audioop.ratecv(slice, audio_params[0], audio_params[1], audio_params[2], cap(target_rate, 2147483647, dsp_grain), None) return slice[0]
def resample(frames, sampwidth, nchannels, rate, newrate): """ Resample frames with a new framerate @param frames (string) input frames. @param sampwidth (int) sample width of the frames. @param nchannels (int) number of channels in the samples @param rate (int) current framerate of the frames @param newrate (int) new framerate of the frames @return converted frames """ return audioop.ratecv(frames, sampwidth, nchannels, rate, newrate, None)[0]
def _split_and_resample_wav(origAudio, start_time, stop_time, new_wav_file): nChannels = origAudio.getnchannels() sampleWidth = origAudio.getsampwidth() frameRate = origAudio.getframerate() origAudio.setpos(int(start_time * frameRate)) chunkData = origAudio.readframes(int((stop_time - start_time) * frameRate)) # by doubling the frame-rate we effectively go from 8 kHz to 16 kHz chunkData, _ = audioop.ratecv(chunkData, sampleWidth, nChannels, frameRate, 2 * frameRate, None) chunkAudio = wave.open(new_wav_file, "w") chunkAudio.setnchannels(nChannels) chunkAudio.setsampwidth(sampleWidth) chunkAudio.setframerate(2 * frameRate) chunkAudio.writeframes(chunkData) chunkAudio.close()
def speed(self, speed): """ Changes the playback speed of the sample, without changing the sample rate. This will change the pitch and duration of the sound accordingly. The algorithm used is simple, and it will cause a loss of sound quality. """ assert not self.__locked assert speed > 0 if speed == 1.0: return self rate = self.samplerate self.__frames = audioop.ratecv(self.__frames, self.samplewidth, self.nchannels, int(self.samplerate*speed), rate, None)[0] self.__samplerate = rate return self
def _ratecv(self, sounds): output = [] for n, (sound, state) in enumerate(zip(sounds, self.ratecv_state)): while True: o, state = ratecv(sound, self.bytes, 1, int(round(len(sound) / self.tick_time) / self.bytes), self.rate, state) #Length may be off by one, so process until OK if len(o) == int(round(self.rate * self.tick_time * self.bytes)): break output.append(o) self.ratecv_state[n] = state return output
def startSound(audio_array, in_sample_rate, in_channels, out_sample_rate=22050, out_channels=2): import audioop audio_array2 = audioop.ratecv(audio_array, 2, in_channels, in_sample_rate, out_sample_rate, None)[0] if out_channels == 1: audio_array2 = audioop.tomono(audio_array2, 2, 0.5, 0.5)[0] audio_array3 = numpy.frombuffer(audio_array2, numpy.int16) if out_channels > 1: audio_array3 = audio_array3.reshape((len(audio_array3)/out_channels,out_channels)) pg.mixer.init(frequency=out_sample_rate, size=-16, channels=out_channels) if DEBUG: print pg.mixer.get_init() sound = pg.sndarray.make_sound(audio_array3) playing = sound.play()
def convert(read,write): write.setparams((1, 2, 8000, 0,'NONE', 'not compressed')) o_fr = read.getframerate() o_chnl = read.getnchannels() t_fr = read.getnframes() data = read.readframes(t_fr) cnvrt = audioop.ratecv(data, 2, o_chnl, o_fr, 8000, None) if o_chnl != 1: mono = audioop.tomono(cnvrt[0], 2, 1, 0) write.writeframes(mono) else: write.writeframes(cnvrt[0]) read.close() write.close()