def seek_audio(src_audio_path, seek_time): new_file_path = src_audio_path + '.seek_{}_seconds.wav'.format(seek_time) audioclip = AudioFileClip(src_audio_path) sound_array = audioclip.to_soundarray() sound_array = np.array(sound_array) sample_rate = audioclip.fps # default is 44100 times per second duration = audioclip.duration time = abs(seek_time) empty_sound_count = int(time * sample_rate) empty_sound = [0, 0] empty_sounds = np.zeros((empty_sound_count, 2)) # If seek_time > 0, put the empty sound at the back after shifting sound to front # If seek time < 0, put the empty sound at the front after shifting sound to back (sound at back is lost) if seek_time > 0: sound_array = sound_array[empty_sound_count:len(sound_array)] sound_array = np.append(sound_array, empty_sounds) else: sound_array = sound_array[0:len(sound_array) - empty_sound_count] sound_array = np.append(empty_sounds, sound_array) sound_array = np.reshape( sound_array, (len(sound_array) // audioclip.nchannels, audioclip.nchannels)) scaled = np.int16(sound_array / np.max(np.abs(sound_array)) * 32767) os.remove(src_audio_path) wav_write(new_file_path, sample_rate, scaled) return new_file_path
def jcamp2wav(fh, wavenme=None, rate=44100, secs=5): ' convert a fh to a .dx file to a .wav file' def sound_func(amp, hz, t): # sound generation return amp * sin(hz * t) def mean(l): return sum(l) / len(l) dct = jcamp.jcamp_read(fh) x, y = dct['x'], dct['y'] dif = (y.max() - y.min()) pk = peakdetect(y, x, lookahead=1, delta=dif / 10) max_peaks = pk[0] # [ [x0,y0] , ...., [xn,yn] ] waves = [[(_y - y.min()) / dif, MusicFreq.freq2octave(_x, 0)] for _x, _y in max_peaks] # amp(0..1), freq oct '0' waves.sort(reverse=True) # get <= 10 most powerful waves = waves[:10] pi2 = pi * 2 # -> evaluate waves average for each sample data = np.asarray([ mean([sound_func(amp, hz, t) for amp, hz in waves]) for t in np.arange(0, secs * pi2, pi2 / rate) ], dtype=np.float32) if wavenme is None or not wavenme: wavenme = fh.name.replace('.dx', '.wav') wav_write(wavenme, rate, data)
def star_sound(fnme, wavenme=None, rate=44100, secs=5): # from data[1] def sound_func(amp, hz, t): # sound generation return amp * sin(hz * t) * sin(MusicFreq.freq2octave(hz, -7) * t) def mean(l): return sum(l) / len(l) irms, lrms, header, data = read_dat(fnme) y = data.T[1] x = data.T[0] dif = (y.max() - y.min()) pk = peakdetect(y, x, lookahead=1, delta=dif / 10) max_peaks = pk[0] # [ [x0,y0] , ...., [xn,yn] ] waves = [[(_y - y.min()) / dif, MusicFreq.freq2octave(_x, 0)] for _x, _y in max_peaks] # amp(0..1), freq oct '0' waves.sort(reverse=True) # get <= 10 most powerful waves = waves[:10] pi2 = pi * 2 # -> evaluate waves average for each sample datawav = np.asarray( [mean([sound_func(amp, hz, t) for amp, hz in waves]) for t in np.arange(0, secs * pi2, pi2 / rate)], dtype=np.float32) if wavenme is None or not wavenme: wavenme = fnme.replace('.dat', '.wav') wav_write(wavenme, rate, datawav)
def downsample(filename, outrate=8000, write_wav = False): (rate, sig) = wav.read(filename) down_sig = librosa.core.resample(sig, rate, outrate, scale=True) if not write_wav: return down_sig, outrate if write_wav: wav_write('{}_down_{}.wav'.format(filename, outrate), outrate, down_sig)
def downsample(filename, outrate=8000, write_wav=False): y, sr = librosa.load(filename, sr=22050) down_sig = librosa.core.resample(y, sr, outrate, scale=True) if not write_wav: return down_sig, outrate if write_wav: wav_write('{}_down_{}.wav'.format(filename, outrate), outrate, down_sig)
def main(path, out): img = cv2.imread(path) data = numpy.random.uniform(-1, 1, 44100) filters = build_filters() filtered = process(img, filters).swapaxes(0, 1) scaled = numpy.int16(filtered / numpy.max(numpy.abs(filtered)) * 32767) rv = numpy.ravel(scaled) wav_write(out, 44100, rv)
def write( self, name='temp', ): wav_write( '{}{}'.format(name, '' if '.wav' in name else '.wav'), int(self.sample_rate), (self.data * (2.**15.)).astype(np.int16), )
def segment_opensmile_extraction(config_p, segment_signal, fs, temp_p): temp_seg_path = '/tmp/temp_segment.wav' int16_s = np.asarray(segment_signal * 32767, dtype=np.int16) wav_write(temp_seg_path, fs, int16_s) opensmile_feat_vec = opensmile_extract(config_p, temp_seg_path, temp_p) subprocess.call(['rm', temp_seg_path]) return opensmile_feat_vec
def audiowrite(data, path, sample_rate=16000, normalize=False, threaded=True): """ Write the audio data ``data`` to the wav file ``path`` The file can be written in a threaded mode. In this case, the writing process will be started at a separate thread. Consequently, the file will not be written when this function exits. :param data: A numpy array with the audio data :param path: The wav file the data should be written to :param sample_rate: Sample rate of the audio data :param normalize: Normalize the audio first so that the values are within the range of [INTMIN, INTMAX]. E.g. no clipping occurs :param threaded: If true, the write process will be started as a separate thread :return: The number of clipped samples """ assert isinstance(path, (str, Path, io.BytesIO)), path assert data.dtype.kind in ['i', 'f'], (data.shape, data.dtype) if isinstance(path, Path): path = str(path) data = data.copy() if normalize: if not data.dtype.kind == 'f': data = data.astype(np.float) data /= np.maximum(np.amax(np.abs(data)), 1e-6) if data.dtype.kind == 'f': data *= int16_max sample_to_clip = np.sum(data > int16_max) if sample_to_clip > 0: print('Warning, clipping {} sample{}.'.format( sample_to_clip, '' if sample_to_clip == 1 else 's' )) data = np.clip(data, int16_min, int16_max) data = data.astype(np.int16) if threaded: threading.Thread(target=wav_write, args=(path, sample_rate, data) ).start() else: try: wav_write(path, sample_rate, data) except Exception: # _struct.error if data.ndim == 2: assert data.shape[1] < 20, ( f"channels bigger than 20 looks wrong " f"(shape: {data.shape}). " f"Maybe you must call audiowrite(data.T, ...)" ) raise return sample_to_clip
def save_wav(Qsav, filename, fs): data = np.array([]); print '\ngetting data\n' while(1): chunk = Qin.get() # append to list if chunk=="EOT": print 'Saving output...' wav_write(filename, fs, data) Q.task_done() return else: data = np.append(data, chunk) # append to list print len(data) Q.task_done()
def sound(u, ts, n_samps=1, name=None): fs = 10000 env = np.exp(-0.03 * ts) #exponentially decaying envelope u_env = u * env samp_inds = np.round((np.linspace(0, m, n_samps + 2))) audio = u_env[samp_inds[1:-1].astype(int), :] audio = np.sum(audio, axis=0) #plt.figure() #plt.plot(ts, audio/max(audio)) sd.play(audio / max(audio), fs) #audio_out = np.asarray(audio/max(audio), dtype=np.int16) if name is not None: wav_write('outputs/{} audio.wav'.format(name), fs, audio / max(audio))
def _handle_listen(self, text): duration = int(text) // config.audio.chunk_duration if text.strip( ) else 0 if len(self.analyzer.audio) > 0: with self.analyzer.lock: audio = np.concatenate(tuple(self.analyzer.audio)[-duration:]) self.analyzer.audio.clear() wav_file = 'listen.wav' wav_write(wav_file, config.audio.sample_rate, audio) opus = check_output(['opusenc', wav_file, '-']) stream = BytesIO(opus) stream.seek(0) self._send('voice', stream) else: self._reply('No audio recorded yet')
def get_wav(): out_file = 'out.wav' data = json.loads(request.data.decode('utf-8'), encoding='utf-8') try: text = data['text'] except: return json.dumps({'error': 'text not set'}), 400, { 'ContentType': 'application/json' } spect = np.asanyarray([k.npvalue() for k in encoder.predict(text)]) signal = vocoder.synthesize(spect) wav_write(out_file, 24000, signal) return send_file(out_file, mimetype='audio/wav')
def infer(filename): print('Start infer file: {}'.format(filename)) #(wav, _) = read_wav(args.in_filepath) # read wav from given file path. (wav, _) = librosa.load(filename, 16000, mono=True) # read wav from given file path. wav = np.asarray(np.multiply(wav, 32768.0), dtype=np.int16) print(max(wav), min(wav), np.mean(wav)) print(wav.shape) input_feat = sess.run(net.infer_feat, feed_dict={ net.s_ph: [wav], net.s_len_ph: [len(wav)] }) # sample of training set. xi_bar_hat = sess.run(net.infer_output, feed_dict={ net.input_ph: input_feat[0], net.nframes_ph: input_feat[1], net.training_ph: False }) # output of network. xi_hat = xi.xi_hat(xi_bar_hat, args.stats['mu_hat'], args.stats['sigma_hat']) #file_name = filename.split('/')[-1].split('.') y_MAG = np.multiply(input_feat[0], gain.gfunc(xi_hat, xi_hat + 1, gtype=args.gain)) y = np.squeeze( sess.run(net.y, feed_dict={ net.y_MAG_ph: y_MAG, net.x_PHA_ph: input_feat[2], net.nframes_ph: input_feat[1], net.training_ph: False })) # output of network. if np.isnan(y).any(): ValueError('NaN values found in enhanced speech.') if np.isinf(y).any(): ValueError('Inf values found in enhanced speech.') y = np.asarray(np.multiply(y, 32768.0), dtype=np.int16) out_filepath = filename.replace('.' + filename.split('.')[-1], '_pred.wav') wav_write(out_filepath, args.f_s, y) print('Infer out file: {} done'.format(out_filepath)) return out_filepath
def main(): rte = RunTimeEncoder() rte.load('models') rtv = RunTimeVocoder() rtv.load('models') with open('texts.txt', 'rt', encoding='utf-8') as f: lines = f.readlines() for k in range(len(lines)): text = lines[k].strip() spect = np.asanyarray([k.npvalue() for k in rte.predict(text)]) signal = rtv.synthesize(spect) wav_write('tests/test_%d.wav' % k, 24000, signal)
def __call__(self, wav=None, sr=None): assert len(wav.shape) == 1 _wav = None input_dtype = wav.dtype try: if random.random() < self.prob: speed_alpha = 1.0 + self.speed_limit * random.uniform(-1, 1) pitch_alpha = self.pitch_limit * random.uniform(-1, 1) * 100 # in cents # https://github.com/carlthome/python-audio-effects/blob/master/pysndfx/dsp.py#L531 with NamedTemporaryFile(suffix=".wav", dir=tempfile_dir) as temp_file: temp_filename = temp_file.name # always feed int16 to sox if wav.dtype == np.float32(): wav_int = float2int(wav) wav_write(temp_filename, sr, wav_int) else: wav_write(temp_filename, sr, wav) torchaudio.initialize_sox() effects = torchaudio.sox_effects.SoxEffectsChain() effects.append_effect_to_chain('pitch', pitch_alpha) effects.append_effect_to_chain('tempo', [speed_alpha]) effects.append_effect_to_chain('rate', sr) effects.set_input_file(temp_filename) _wav, _sr = effects.sox_build_flow_effects() torchaudio.shutdown_sox() _wav = _wav.numpy().squeeze() assert sr == _sr # always float output if _wav.dtype == np.int16(): _wav = int2float(_wav) except Exception as e: print(str(e)) if _wav is not None: return {'wav': _wav,'sr': sr} else: return {'wav': wav,'sr': sr}
def audiowrite(data, path, samplerate=16000, normalize=False, threaded=True): """ Write the audio data ``data`` to the wav file ``path`` The file can be written in a threaded mode. In this case, the writing process will be started at a separate thread. Consequently, the file will not be written when this function exits. :param data: A numpy array with the audio data :param path: The wav file the data should be written to :param samplerate: Samplerate of the audio data :param normalize: Normalize the audio first so that the values are within the range of [INTMIN, INTMAX]. E.g. no clipping occurs :param threaded: If true, the write process will be started as a separate thread :return: The number of clipped samples """ data = data.copy() int16_max = np.iinfo(np.int16).max int16_min = np.iinfo(np.int16).min if normalize: if not data.dtype.kind == 'f': data = data.astype(np.float) data /= np.max(np.abs(data)) if data.dtype.kind == 'f': data *= int16_max sample_to_clip = np.sum(data > int16_max) if sample_to_clip > 0: print('Warning, clipping {} samples'.format(sample_to_clip)) data = np.clip(data, int16_min, int16_max) data = data.astype(np.int16) if threaded: threading.Thread(target=wav_write, args=(path, samplerate, data)).start() else: wav_write(path, samplerate, data) return sample_to_clip
def evaluate_on_file(self, eval_file, save_dir, dset_X='X', dset_Y='Y'): """ Arguments eval_file: HDF5 file containing the evaluation dataset save_dir: Directory to save the generated audio dset_X: Name of training set features inside the HDF5 file dset_Y: Name of training set labels inside the HDF5 file """ save_file = os.path.join(save_dir, os.path.splitext(os.path.basename(eval_file))[0]+'.wav') X = hdf5matrix(train_file, dset_X) Y = hdf5matrix(train_file, dset_Y) audio = [] data_gen = TimeseriesGenerator(X, Y, length=self.receptive_field, batch_size=1) steps = len(data_gen) for step in range(steps): if step==0: batch_x, batch_y = data_gen[step] pred = self.model.predict(batch_x, batch_size=1, verbose=1) val = np.random.choice(range(256), p = pred[:]) audio.append(val) else: batch_x, batch_y = data_gen[step] batch_x[0,-1,:256] = 0 batch_x[0,-1,int(audio[-1])] = 1 pred = self.model.predict(batch_x, batch_size=1, verbose=1) val = np.random.choice(range(256), p = pred[:]) audio.append(val) def inverse_mu_law(quantized): # scale to [-1,1] quantized = np.asarray(quantized).astype(np.float32) quantized /= 128 quantized -= 1 wav_scale = max(np.abs(np.iinfo(np.int16).min),np.iinfo(np.int16).max) quantized *= wav_scale audio = quantized.astype(np.int16) return audio gen_audio = inverse_mu_law(audio) wav_write(save_file, 16000, gen_audio)
os.environ['PATH'] = os.getcwd()+"/renode:"+os.environ['PATH'] # %% [markdown] """## Record 1 sec of audio after clicking the button""" # %% audio, sr = get_audio() # %% [markdown] """## Convert audio to required format""" # %% from scipy.io.wavfile import write as wav_write import pyaudioconvert as pac wav_write('audio.wav', sr, audio) #convert to 16bit because Chrome seems to ignore recording settings pac.convert_wav_to_16bit_mono('audio.wav', 'converted.wav') sr, audio = wav_read('converted.wav') audio.tofile('audio_bin') !soxi converted.wav # %% [markdown] """## Run a micro-speech example with a recorded audio sample in Renode""" # %% from pyrenode import * shutdown_renode() connect_renode() # this sets up a log file, and clears the simulation (just in case) tell_renode('mach create')
def write_wav(self, name): """save waveform to file The bits-per-sample will be determined by the data-type (mostly)""" wav_write(name, self._fs, self._value)
def make_burst(duration): pip_samples = math.ceil(fs * duration) #wave = (numpy.random.random(pip_samples) * 2) - 1 wave = numpy.random.normal(size=pip_samples) window = signal_ramp(pip_samples, 5) wave = wave * window return wave repeats = 10 silence = make_pip(0.25, 0) * 0 hp_filter = HighPassFilter(3000, fs, 6) parts = [] for i in range(repeats): burst = make_burst(0.5) burst = hp_filter.run(burst) parts.append(burst) parts.append(silence) sound = numpy.concatenate(parts) pyplot.plot(sound) pyplot.show() wav_write('pip.wav', fs, sound) # pip = make_pip(0.25, 1000) # #sound = numpy.concatenate((pip, silence))
def handle_microphone(obj): audio, sr = get_audio() wav_write('audio.wav', sr, audio) pac.convert_wav_to_16bit_mono('audio.wav', 'converted.wav') sr, audio = wav_read('converted.wav') audio.tofile('audio_bin')
def __call__(self, wav=None, sr=None): assert len(wav.shape) == 1 _wav = None if random.random() < self.prob: codec = random.choice(self.sox_codec_list) quality = random.choice(self.quality_presets) sox_sr = random.choice(self.sox_sr_list) # supress warnings if codec in ['amr-nb', 'gsm']: sox_sr = 8 with NamedTemporaryFile(suffix="."+codec, dir=tempfile_dir) as codec_temp_file: with NamedTemporaryFile(suffix=".wav", dir=tempfile_dir) as wav_temp_file: # save wav to disk # transform using a codec # convert back to wav and read codec_temp_filename = codec_temp_file.name wav_temp_filename = wav_temp_file.name if wav.dtype == np.float32(): wav_int = float2int(wav) wav_write(wav_temp_filename, sr, wav_int) else: wav_write(wav_temp_filename, sr, wav) sox_params = 'sox {} -r {}k -c 1 -C {} -t {} {}'.format( wav_temp_filename, sox_sr, quality, codec, codec_temp_filename ) sox_reverse_params = 'sox {} -r {}k -c 1 -e signed-integer -t {} {}'.format( codec_temp_filename, str(int(sr//1000)), 'wav', wav_temp_filename ) # print(sox_params) # print(sox_reverse_params) _ = subprocess.Popen(sox_params, shell=True, stdout=subprocess.PIPE).stdout.read() _ = subprocess.Popen(sox_reverse_params, shell=True, stdout=subprocess.PIPE).stdout.read() _sr, _wav = wav_read(wav_temp_file) assert _sr == sr if _wav.dtype == np.int16(): _wav = int2float(_wav) if _wav is not None: return {'wav': _wav, 'sr': sr} else: return {'wav': wav, 'sr': sr}
def create_words_template_fromwav(wav, wordlist=[], Fs=16000): frm_time_width = 0.02 # 20ms analy_sta = "noinit" ana_frm_cnt = 0 wav_data = np.array(wav) ana_frm_width = int(frm_time_width * Fs) ana_frm_step = ana_frm_width // 2 noise_tmpt_frms = 6 noise_fft_tmpt = np.zeros((noise_tmpt_frms, ana_frm_step)) noise_freq_tmpt = np.zeros((noise_tmpt_frms, ana_frm_step)) # 保存旧参数文件 wavfile = "./ResFiles/Wav/Word_voice_tmpt.wav" time_ymd_hms = time.strftime("_%Y%m%d_%H%M%S", time.localtime()) new_wavfile = wavfile[0:-4] + time_ymd_hms + ".wav" para_file = wavfile[0:-4] + 'WordPara' + ".npz" new_para_file = para_file[0:-4] + time_ymd_hms + ".npz" shutil.copyfile(wavfile, new_wavfile) shutil.copyfile(para_file, new_para_file) write_data = np.array(wav_data, dtype='int16') wav_write(wavfile, Fs, write_data) wav_data = sigana.normalization_pn1(wav_data) # print(len(wav_data)) # print(wav_data[0:200]) while analy_sta == "noinit": ana_poi = ana_frm_cnt * ana_frm_step _frm = sigana.get_frame(wav_data, ana_poi, ana_frm_width) _nfft, _freq_y = sigana.calc_fft(_frm, Fs) noise_fft_tmpt[ana_frm_cnt] = _nfft noise_freq_tmpt[ana_frm_cnt] = _freq_y ana_frm_cnt += 1 if ana_frm_cnt >= noise_tmpt_frms: analy_sta = "noise" # add_voi_ana_log(ana_poi, analy_sta) break else: pass while True: ana_poi = ana_frm_cnt * ana_frm_step ana_frm_cnt += 1 _end_poi = ana_poi + ana_frm_width if _end_poi > len(wav_data): analy_sta = "finished" break _frm = sigana.get_frame(wav_data, ana_poi, ana_frm_width) _sum, _judge = judge_voice_energy(_frm) if _judge == "voiless": # 根据短时能量判定为清音 if analy_sta != "voiless": if analy_sta == "noise": create_new_voice_fileseg(ana_poi) analy_sta = "voiless" add_voi_ana_log(ana_poi, analy_sta) # add_sum_log(_sum) add_sum_log(0.5) elif _judge == "voied": # 根据短时能量判定为浊音 if analy_sta != "voied": if analy_sta == "noise": create_new_voice_fileseg(ana_poi) analy_sta = "voied" add_voi_ana_log(ana_poi, analy_sta) # add_sum_log(_sum) add_sum_log(1) else: # 根据一阶差分值判别清音和噪音 _sum, _judge = judge_voiless_delta(_frm) if _judge == "noise": if analy_sta != "noise": # if ana_poi - g_word_arr[g_word_idx].sta_poi >= 400: finish_of_voice_fileseg(ana_poi) analy_sta = "noise" add_voi_ana_log(ana_poi, analy_sta) # add_sum_log(_sum) add_sum_log(0.2) elif _judge == "voiless": if analy_sta != "voiless": if analy_sta == "noise": create_new_voice_fileseg(ana_poi) analy_sta = "voiless" add_voi_ana_log(ana_poi, analy_sta) # add_sum_log(_sum) add_sum_log(0.5) else: pass if analy_sta == "voiless": add_frm_stat_log(ana_poi, 0.25) elif analy_sta == "voied": add_frm_stat_log(ana_poi, 0.5) elif analy_sta == "noise": add_frm_stat_log(ana_poi, -0.15) print("len(g_word_arr): ", len(g_word_arr)) print("g_word_idx: ", g_word_idx) MIN_WORD_SPACE = 2400 global g_check_word_idx global g_check_word_arr for i in range(len(g_word_arr)): if i == 0: _new_vfseg = VoiceFileSegInfo(wavfile) _new_vfseg.sta_poi = g_word_arr[i].sta_poi _new_vfseg.end_poi = g_word_arr[i].end_poi g_check_word_arr.append(_new_vfseg) # g_check_word_idx = g_check_word_idx + 1 # if (g_word_arr[i].end_poi - g_word_arr[i].sta_poi) >= 640: if i >= 1: if (g_word_arr[i].sta_poi - g_check_word_arr[g_check_word_idx].end_poi ) >= MIN_WORD_SPACE: g_check_word_arr[g_check_word_idx].end_poi = g_word_arr[ i - 1].end_poi # create a new vfseg for new word _new_vfseg2 = VoiceFileSegInfo(wavfile) _new_vfseg2.sta_poi = g_word_arr[i].sta_poi _new_vfseg2.end_poi = g_word_arr[i].end_poi g_check_word_arr.append(_new_vfseg2) g_check_word_idx = g_check_word_idx + 1 else: g_check_word_arr[g_check_word_idx].end_poi = g_word_arr[ i].end_poi if i == len(g_word_arr) - 1: g_check_word_arr[g_check_word_idx].end_poi = g_word_arr[i].end_poi print("len(g_word_arr): ", len(g_word_arr)) print("len(g_check_word_arr): ", len(g_check_word_arr)) save_pars = [] _word_n = len(g_check_word_arr) if _word_n > len(wordlist): _word_n = len(wordlist) for i in range(_word_n): _disp_data = wav_data[g_check_word_arr[i].sta_poi:g_check_word_arr[i]. end_poi] if len(_disp_data) >= 2400: move_time = 0.01 num_frms = int(len(_disp_data) / (16000 * move_time)) _fft, _freq = sigana.cal_frames_fft_log(_disp_data * 10000, 16000, t_start=0, t_window=0.02, frame_n=num_frms, move_step=move_time) _fft = sigana.normalization_array_pn1(_fft) save_pars.append(_fft) voice_para_save.save_word_voice_para( g_wordpara_arr[i], _fft, word=wordlist[i], file=wavfile, s_idx=g_check_word_arr[i].sta_poi, e_idx=g_check_word_arr[i].end_poi, time=0.0, ext_info="") para_file_name = wavfile[0:-4] + 'WordPara' + ".npz" np.savez(para_file_name, g_wordpara_arr)
def save_wav(save_path, f_s, wav): if isinstance(wav[0], np.float32): wav = np.asarray(np.multiply(wav, 32768.0), dtype=np.int16) wav_write(save_path, f_s, wav)
import sys import numpy as np from scipy.io.wavfile import write as wav_write audio = np.empty((0,1), dtype = np.uint8) with open(sys.argv[1]) as f: for i, line in enumerate(f): if i == 0 or i == 1: print(i) continue l = list(line) l.remove('\n') ll = np.packbits(np.array(l, dtype=np.uint8)) audio = np.vstack((audio, ll.astype(np.uint8))) audio = np.stack(audio, axis=1)[0] wav_write("output.wav", data=audio, rate=int(sys.argv[2]))
def write_wav(self,filename): wav_write(filename,round(float(self.samplerate)),self.data)
def make_spike_wav(fname_spikes, fname_wav, nrn_idx=None, time_mode="real", fname_spike_kernel=None, wav_dtype=np.int32, plot=False): """Create a wav audio file from the spike data Parameters ---------- fname_spikes: string input spike data text filename first column is time subsequent columns are each neuron's spikes as would be recorded in the nengo simulator fname_wav: string filename of output wav data nrn_idx: list-like or none indices of neurons to use to generate wav file if None, uses all neurons, one per wav file channel time_mode: "real" or "sim" Whether to use the spike's real or simulation time fname_spike_kernel: string or None if string, wav file name of kernel to convolve spikes with to generate different sounds wav_dtype: numpy dtype data type to be used in the wav file """ assert isinstance(fname_spikes, str) assert isinstance(fname_wav, str) assert time_mode in ["real", "sim"] sim_time, measured_time, spk_data_raw = _load_spike_data(fname_spikes) if nrn_idx is not None: spk_data_raw = spk_data_raw[:, nrn_idx] n_samples, n_neurons = spk_data_raw.shape if time_mode == "real": time = measured_time elif time_mode == "sim": time = sim_time n_resamples = int(np.ceil(time[-1] * AUDIO_SAMPLE_RATE)) spk_data = np.zeros((n_resamples, n_neurons)) for nrn_idx in range(n_neurons): spk_idx = np.nonzero(spk_data_raw[:, nrn_idx])[0] spk_times = time[spk_idx] spk_resampled_idx = np.round(spk_times * AUDIO_SAMPLE_RATE).astype(int) spk_data[spk_resampled_idx, nrn_idx] = AUDIO_SAMPLE_RATE # impulse as 1/dt if fname_spike_kernel is not None: assert isinstance(fname_spike_kernel, str) spike_kernel_sample_rate, spk_kernel_data = wav_read( fname_spike_kernel) assert spike_kernel_sample_rate == AUDIO_SAMPLE_RATE, ( "spike_kernel wav file sample rate must match AUDIO_SAMPLE_RATE") assert len(spk_kernel_data.shape ) == 1, "spike_kernel wav data must have a single channel" spk_kernel_data = spk_kernel_data shift_idx = np.argmax( np.abs(spk_kernel_data)) # find peak of kernel for later alignment spk_data_preconv = spk_data.copy() spk_data_postconv = np.zeros( (spk_data.shape[0] + len(spk_kernel_data) - 1, n_neurons)) for nrn_idx in range(n_neurons): spk_data_postconv[:, nrn_idx] = convolve(spk_data[:, nrn_idx], spk_kernel_data, mode="full") spk_data = spk_data_postconv spk_data = spk_data[shift_idx:] # align with original waveform spk_data = spk_data[:n_resamples] # clip to original length np.clip(spk_data, np.iinfo(wav_dtype).min, np.iinfo(wav_dtype).max, spk_data) spk_data = spk_data.astype(wav_dtype) wav_write(fname_wav, AUDIO_SAMPLE_RATE, spk_data) if plot: spk_data_fig = plt.figure() time_resampled = np.arange(n_resamples) * AUDIO_SAMPLE_DT if fname_spike_kernel is not None: ax = spk_data_fig.add_subplot(211) ax.plot(time_resampled, spk_data_preconv) ax.set_ylabel("raw spike waveform") ax.set_title("spike waveforms") ax = spk_data_fig.add_subplot(212, sharex=ax) ax.plot(time_resampled, spk_data) ax.set_ylabel("filtered spike waveform") ax.set_xlabel("time (s)") time_kernel = np.arange(len(spk_kernel_data)) * AUDIO_SAMPLE_DT spk_kernel_fig = plt.figure() ax = spk_kernel_fig.add_subplot(111) ax.plot(time_kernel, spk_kernel_data) ax.set_title("spike kernel waveform") ax.set_xlabel("time (s)") else: ax = spk_data_fig.add_subplot(111) ax.plot(spk_data) plt.show()